collection.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. """Data structures to hold collections of images, with optional caching."""
  2. import os
  3. from glob import glob
  4. import re
  5. from collections.abc import Sequence
  6. from copy import copy
  7. import numpy as np
  8. from PIL import Image
  9. from tifffile import TiffFile
  10. __all__ = [
  11. 'MultiImage',
  12. 'ImageCollection',
  13. 'concatenate_images',
  14. 'imread_collection_wrapper',
  15. ]
  16. def concatenate_images(ic):
  17. """Concatenate all images in the image collection into an array.
  18. Parameters
  19. ----------
  20. ic : an iterable of images
  21. The images to be concatenated.
  22. Returns
  23. -------
  24. array_cat : ndarray
  25. An array having one more dimension than the images in `ic`.
  26. See Also
  27. --------
  28. ImageCollection.concatenate
  29. MultiImage.concatenate
  30. Raises
  31. ------
  32. ValueError
  33. If images in `ic` don't have identical shapes.
  34. Notes
  35. -----
  36. ``concatenate_images`` receives any iterable object containing images,
  37. including ImageCollection and MultiImage, and returns a NumPy array.
  38. """
  39. all_images = [image[np.newaxis, ...] for image in ic]
  40. try:
  41. array_cat = np.concatenate(all_images)
  42. except ValueError:
  43. raise ValueError('Image dimensions must agree.')
  44. return array_cat
  45. def alphanumeric_key(s):
  46. """Convert string to list of strings and ints that gives intuitive sorting.
  47. Parameters
  48. ----------
  49. s : string
  50. Returns
  51. -------
  52. k : a list of strings and ints
  53. Examples
  54. --------
  55. >>> alphanumeric_key('z23a')
  56. ['z', 23, 'a']
  57. >>> filenames = ['f9.10.png', 'e10.png', 'f9.9.png', 'f10.10.png',
  58. ... 'f10.9.png']
  59. >>> sorted(filenames)
  60. ['e10.png', 'f10.10.png', 'f10.9.png', 'f9.10.png', 'f9.9.png']
  61. >>> sorted(filenames, key=alphanumeric_key)
  62. ['e10.png', 'f9.9.png', 'f9.10.png', 'f10.9.png', 'f10.10.png']
  63. """
  64. k = [int(c) if c.isdigit() else c for c in re.split('([0-9]+)', s)]
  65. return k
  66. def _is_multipattern(input_pattern):
  67. """Helping function. Returns True if pattern contains a tuple, list, or a
  68. string separated with os.pathsep."""
  69. # Conditions to be accepted by ImageCollection:
  70. has_str_ospathsep = isinstance(input_pattern, str) and os.pathsep in input_pattern
  71. not_a_string = not isinstance(input_pattern, str)
  72. has_iterable = isinstance(input_pattern, Sequence)
  73. has_strings = all(isinstance(pat, str) for pat in input_pattern)
  74. is_multipattern = has_str_ospathsep or (
  75. not_a_string and has_iterable and has_strings
  76. )
  77. return is_multipattern
  78. class ImageCollection:
  79. """Load and manage a collection of image files.
  80. Parameters
  81. ----------
  82. load_pattern : str or list of str
  83. Pattern string or list of strings to load. The filename path can be
  84. absolute or relative.
  85. conserve_memory : bool, optional
  86. If True, :class:`skimage.io.ImageCollection` does not keep more than one in
  87. memory at a specific time. Otherwise, images will be cached once they are loaded.
  88. Other parameters
  89. ----------------
  90. load_func : callable
  91. ``imread`` by default. See Notes below.
  92. **load_func_kwargs : dict
  93. Any other keyword arguments are passed to `load_func`.
  94. Attributes
  95. ----------
  96. files : list of str
  97. If a pattern string is given for `load_pattern`, this attribute
  98. stores the expanded file list. Otherwise, this is equal to
  99. `load_pattern`.
  100. Notes
  101. -----
  102. Note that files are always returned in alphanumerical order. Also note that slicing
  103. returns a new :class:`skimage.io.ImageCollection`, *not* a view into the data.
  104. ImageCollection image loading can be customized through
  105. `load_func`. For an ImageCollection ``ic``, ``ic[5]`` calls
  106. ``load_func(load_pattern[5])`` to load that image.
  107. For example, here is an ImageCollection that, for each video provided,
  108. loads every second frame::
  109. import imageio.v3 as iio3
  110. import itertools
  111. def vidread_step(f, step):
  112. vid = iio3.imiter(f)
  113. return list(itertools.islice(vid, None, None, step)
  114. video_file = 'no_time_for_that_tiny.gif'
  115. ic = ImageCollection(video_file, load_func=vidread_step, step=2)
  116. ic # is an ImageCollection object of length 1 because 1 video is provided
  117. x = ic[0]
  118. x[5] # the 10th frame of the first video
  119. Alternatively, if `load_func` is provided and `load_pattern` is a
  120. sequence, an :class:`skimage.io.ImageCollection` of corresponding length will
  121. be created, and the individual images will be loaded by calling `load_func` with the
  122. matching element of the `load_pattern` as its first argument. In this
  123. case, the elements of the sequence do not need to be names of existing
  124. files (or strings at all). For example, to create an :class:`skimage.io.ImageCollection`
  125. containing 500 images from a video::
  126. class FrameReader:
  127. def __init__ (self, f):
  128. self.f = f
  129. def __call__ (self, index):
  130. return iio3.imread(self.f, index=index)
  131. ic = ImageCollection(range(500), load_func=FrameReader('movie.mp4'))
  132. ic # is an ImageCollection object of length 500
  133. Another use of `load_func` would be to convert all images to ``uint8``::
  134. def imread_convert(f):
  135. return imread(f).astype(np.uint8)
  136. ic = ImageCollection('/tmp/*.png', load_func=imread_convert)
  137. Examples
  138. --------
  139. >>> import imageio.v3 as iio3
  140. >>> import skimage.io as io
  141. # Where your images are located
  142. >>> data_dir = os.path.join(os.path.dirname(__file__), '../data')
  143. >>> coll = io.ImageCollection(data_dir + '/chess*.png')
  144. >>> len(coll)
  145. 2
  146. >>> coll[0].shape
  147. (200, 200)
  148. >>> image_col = io.ImageCollection([f'{data_dir}/*.png', '{data_dir}/*.jpg'])
  149. >>> class MultiReader:
  150. ... def __init__ (self, f):
  151. ... self.f = f
  152. ... def __call__ (self, index):
  153. ... return iio3.imread(self.f, index=index)
  154. ...
  155. >>> filename = data_dir + '/no_time_for_that_tiny.gif'
  156. >>> ic = io.ImageCollection(range(24), load_func=MultiReader(filename))
  157. >>> len(image_col)
  158. 23
  159. >>> isinstance(ic[0], np.ndarray)
  160. True
  161. """
  162. def __init__(
  163. self, load_pattern, conserve_memory=True, load_func=None, **load_func_kwargs
  164. ):
  165. """Load and manage a collection of images."""
  166. self._files = []
  167. if _is_multipattern(load_pattern):
  168. if isinstance(load_pattern, str):
  169. load_pattern = load_pattern.split(os.pathsep)
  170. for pattern in load_pattern:
  171. self._files.extend(glob(pattern))
  172. self._files = sorted(self._files, key=alphanumeric_key)
  173. elif isinstance(load_pattern, str):
  174. self._files.extend(glob(load_pattern))
  175. self._files = sorted(self._files, key=alphanumeric_key)
  176. elif isinstance(load_pattern, Sequence) and load_func is not None:
  177. self._files = list(load_pattern)
  178. else:
  179. raise TypeError('Invalid pattern as input.')
  180. if load_func is None:
  181. from ._io import imread
  182. self.load_func = imread
  183. self._numframes = self._find_images()
  184. else:
  185. self.load_func = load_func
  186. self._numframes = len(self._files)
  187. self._frame_index = None
  188. if conserve_memory:
  189. memory_slots = 1
  190. else:
  191. memory_slots = self._numframes
  192. self._conserve_memory = conserve_memory
  193. self._cached = None
  194. self.load_func_kwargs = load_func_kwargs
  195. self.data = np.empty(memory_slots, dtype=object)
  196. @property
  197. def files(self):
  198. return self._files
  199. @property
  200. def conserve_memory(self):
  201. return self._conserve_memory
  202. def _find_images(self):
  203. index = []
  204. for fname in self._files:
  205. if fname.lower().endswith(('.tiff', '.tif')):
  206. with open(fname, 'rb') as f:
  207. img = TiffFile(f)
  208. index += [(fname, i) for i in range(len(img.pages))]
  209. else:
  210. try:
  211. im = Image.open(fname)
  212. im.seek(0)
  213. except OSError:
  214. continue
  215. i = 0
  216. while True:
  217. try:
  218. im.seek(i)
  219. except EOFError:
  220. break
  221. index.append((fname, i))
  222. i += 1
  223. if hasattr(im, 'fp') and im.fp:
  224. im.fp.close()
  225. self._frame_index = index
  226. return len(index)
  227. def __getitem__(self, n):
  228. """Return selected image(s) in the collection.
  229. Loading is done on demand.
  230. Parameters
  231. ----------
  232. n : int or slice
  233. The image number to be returned, or a slice selecting the images
  234. and ordering to be returned in a new ImageCollection.
  235. Returns
  236. -------
  237. img : ndarray or :class:`skimage.io.ImageCollection`
  238. The `n`-th image in the collection, or a new ImageCollection with
  239. the selected images.
  240. """
  241. if hasattr(n, '__index__'):
  242. n = n.__index__()
  243. if not isinstance(n, (int, slice)):
  244. raise TypeError('slicing must be with an int or slice object')
  245. if isinstance(n, int):
  246. n = self._check_imgnum(n)
  247. idx = n % len(self.data)
  248. if (self.conserve_memory and n != self._cached) or (self.data[idx] is None):
  249. kwargs = self.load_func_kwargs
  250. if self._frame_index:
  251. fname, img_num = self._frame_index[n]
  252. if img_num is not None:
  253. kwargs['img_num'] = img_num
  254. try:
  255. self.data[idx] = self.load_func(fname, **kwargs)
  256. # Account for functions that do not accept an img_num kwarg
  257. except TypeError as e:
  258. if "unexpected keyword argument 'img_num'" in str(e):
  259. del kwargs['img_num']
  260. self.data[idx] = self.load_func(fname, **kwargs)
  261. else:
  262. raise
  263. else:
  264. self.data[idx] = self.load_func(self.files[n], **kwargs)
  265. self._cached = n
  266. return self.data[idx]
  267. else:
  268. # A slice object was provided, so create a new ImageCollection
  269. # object. Any loaded image data in the original ImageCollection
  270. # will be copied by reference to the new object. Image data
  271. # loaded after this creation is not linked.
  272. fidx = range(self._numframes)[n]
  273. new_ic = copy(self)
  274. if self._frame_index:
  275. new_ic._files = [self._frame_index[i][0] for i in fidx]
  276. new_ic._frame_index = [self._frame_index[i] for i in fidx]
  277. else:
  278. new_ic._files = [self._files[i] for i in fidx]
  279. new_ic._numframes = len(fidx)
  280. if self.conserve_memory:
  281. if self._cached in fidx:
  282. new_ic._cached = fidx.index(self._cached)
  283. new_ic.data = np.copy(self.data)
  284. else:
  285. new_ic.data = np.empty(1, dtype=object)
  286. else:
  287. new_ic.data = self.data[fidx]
  288. return new_ic
  289. def _check_imgnum(self, n):
  290. """Check that the given image number is valid."""
  291. num = self._numframes
  292. if -num <= n < num:
  293. n = n % num
  294. else:
  295. raise IndexError(f"There are only {num} images in the collection")
  296. return n
  297. def __iter__(self):
  298. """Iterate over the images."""
  299. for i in range(len(self)):
  300. yield self[i]
  301. def __len__(self):
  302. """Number of images in collection."""
  303. return self._numframes
  304. def __str__(self):
  305. return str(self.files)
  306. def reload(self, n=None):
  307. """Clear the image cache.
  308. Parameters
  309. ----------
  310. n : None or int
  311. Clear the cache for this image only. By default, the
  312. entire cache is erased.
  313. """
  314. self.data = np.empty_like(self.data)
  315. def concatenate(self):
  316. """Concatenate all images in the collection into an array.
  317. Returns
  318. -------
  319. ar : np.ndarray
  320. An array having one more dimension than the images in `self`.
  321. See Also
  322. --------
  323. skimage.io.concatenate_images
  324. Raises
  325. ------
  326. ValueError
  327. If images in the :class:`skimage.io.ImageCollection` do not have identical
  328. shapes.
  329. """
  330. return concatenate_images(self)
  331. def imread_collection_wrapper(imread):
  332. def imread_collection(load_pattern, conserve_memory=True):
  333. """Return an `ImageCollection` from files matching the given pattern.
  334. Note that files are always stored in alphabetical order. Also note that
  335. slicing returns a new ImageCollection, *not* a view into the data.
  336. See `skimage.io.ImageCollection` for details.
  337. Parameters
  338. ----------
  339. load_pattern : str or list
  340. Pattern glob or filenames to load. The path can be absolute or
  341. relative. Multiple patterns should be separated by a colon,
  342. e.g. ``/tmp/work/*.png:/tmp/other/*.jpg``. Also see
  343. implementation notes below.
  344. conserve_memory : bool, optional
  345. If True, never keep more than one in memory at a specific
  346. time. Otherwise, images will be cached once they are loaded.
  347. """
  348. return ImageCollection(
  349. load_pattern, conserve_memory=conserve_memory, load_func=imread
  350. )
  351. return imread_collection
  352. class MultiImage(ImageCollection):
  353. """A class containing all frames from multi-frame TIFF images.
  354. Parameters
  355. ----------
  356. load_pattern : str or list of str
  357. Pattern glob or filenames to load. The path can be absolute or
  358. relative.
  359. conserve_memory : bool, optional
  360. Whether to conserve memory by only caching the frames of a single
  361. image. Default is True.
  362. Notes
  363. -----
  364. `MultiImage` returns a list of image-data arrays. In this
  365. regard, it is very similar to `ImageCollection`, but the two differ in
  366. their treatment of multi-frame images.
  367. For a TIFF image containing N frames of size WxH, `MultiImage` stores
  368. all frames of that image as a single element of shape `(N, W, H)` in the
  369. list. `ImageCollection` instead creates N elements of shape `(W, H)`.
  370. For an animated GIF image, `MultiImage` reads only the first frame, while
  371. `ImageCollection` reads all frames by default.
  372. Examples
  373. --------
  374. # Where your images are located
  375. >>> data_dir = os.path.join(os.path.dirname(__file__), '../data')
  376. >>> multipage_tiff = data_dir + '/multipage.tif'
  377. >>> multi_img = MultiImage(multipage_tiff)
  378. >>> len(multi_img) # multi_img contains one element
  379. 1
  380. >>> multi_img[0].shape # this element is a two-frame image of shape:
  381. (2, 15, 10)
  382. >>> image_col = ImageCollection(multipage_tiff)
  383. >>> len(image_col) # image_col contains two elements
  384. 2
  385. >>> for frame in image_col:
  386. ... print(frame.shape) # each element is a frame of shape (15, 10)
  387. ...
  388. (15, 10)
  389. (15, 10)
  390. """
  391. def __init__(self, filename, conserve_memory=True, dtype=None, **imread_kwargs):
  392. """Load a multi-img."""
  393. from ._io import imread
  394. self._filename = filename
  395. super().__init__(filename, conserve_memory, load_func=imread, **imread_kwargs)
  396. @property
  397. def filename(self):
  398. return self._filename