zarr.py 56 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603
  1. # tifffile/zarr.py
  2. # Copyright (c) 2008-2025, Christoph Gohlke
  3. # All rights reserved.
  4. #
  5. # Redistribution and use in source and binary forms, with or without
  6. # modification, are permitted provided that the following conditions are met:
  7. #
  8. # 1. Redistributions of source code must retain the above copyright notice,
  9. # this list of conditions and the following disclaimer.
  10. #
  11. # 2. Redistributions in binary form must reproduce the above copyright notice,
  12. # this list of conditions and the following disclaimer in the documentation
  13. # and/or other materials provided with the distribution.
  14. #
  15. # 3. Neither the name of the copyright holder nor the names of its
  16. # contributors may be used to endorse or promote products derived from
  17. # this software without specific prior written permission.
  18. #
  19. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  23. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29. # POSSIBILITY OF SUCH DAMAGE.
  30. """Zarr 3 TIFF and file sequence stores."""
  31. from __future__ import annotations
  32. __all__ = ['ZarrFileSequenceStore', 'ZarrStore', 'ZarrTiffStore']
  33. import asyncio
  34. import json
  35. import sys
  36. from typing import TYPE_CHECKING
  37. import numpy
  38. import zarr
  39. try:
  40. from zarr.abc.store import ByteRequest, Store
  41. from zarr.core.buffer.cpu import NDBuffer
  42. from zarr.core.chunk_grids import RegularChunkGrid
  43. except ImportError as exc:
  44. raise ValueError(f'zarr {zarr.__version__} < 3 is not supported') from exc
  45. from .tifffile import (
  46. CHUNKMODE,
  47. COMPRESSION,
  48. FileCache,
  49. FileSequence,
  50. NullContext,
  51. TiffFrame,
  52. TiffPage,
  53. TiffPageSeries,
  54. TiledSequence,
  55. create_output,
  56. enumarg,
  57. imread,
  58. jpeg_decode_colorspace,
  59. product,
  60. )
  61. if TYPE_CHECKING:
  62. import os
  63. import threading
  64. from collections.abc import (
  65. AsyncIterator,
  66. Callable,
  67. Iterable,
  68. Iterator,
  69. Sequence,
  70. )
  71. from typing import Any, TextIO
  72. from numpy.typing import DTypeLike, NDArray
  73. from zarr.core.buffer import Buffer, BufferPrototype
  74. from zarr.core.indexing import BasicSelection
  75. from .tifffile import ByteOrder, OutputType
  76. class ZarrStore(Store):
  77. """Zarr 3 store base class.
  78. Parameters:
  79. fillvalue:
  80. Value to use for missing chunks of Zarr store.
  81. The default is 0.
  82. chunkmode:
  83. Specifies how to chunk data.
  84. read_only:
  85. Passed to :py:class:`zarr.abc.store.Store`.
  86. References:
  87. 1. https://zarr.readthedocs.io/en/stable/api/zarr/abc/store/
  88. 2. https://zarr.readthedocs.io/en/stable/spec/v2.html
  89. 3. https://forum.image.sc/t/multiscale-arrays-v0-1/37930
  90. """
  91. _read_only: bool
  92. _store: dict[str, Any]
  93. _fillvalue: float
  94. _chunkmode: int
  95. def __init__(
  96. self,
  97. /,
  98. *,
  99. fillvalue: float | None = None,
  100. chunkmode: CHUNKMODE | int | str | None = None,
  101. read_only: bool = True,
  102. ) -> None:
  103. super().__init__(read_only=read_only)
  104. self._store = {}
  105. self._fillvalue = 0 if fillvalue is None else fillvalue
  106. if chunkmode is None:
  107. self._chunkmode = CHUNKMODE(0)
  108. else:
  109. self._chunkmode = enumarg(CHUNKMODE, chunkmode)
  110. def __hash__(self) -> int:
  111. return hash((self._store.items(), self._fillvalue, self._chunkmode))
  112. def __eq__(self, other: object) -> bool:
  113. """Return whether objects are equal."""
  114. return (
  115. isinstance(other, type(self))
  116. and self._store == other._store
  117. and self._fillvalue == other._fillvalue
  118. and self._chunkmode == other._chunkmode
  119. )
  120. async def get_partial_values(
  121. self,
  122. prototype: BufferPrototype,
  123. key_ranges: Iterable[tuple[str, ByteRequest | None]],
  124. ) -> list[Buffer | None]:
  125. """Return possibly partial values from given key_ranges."""
  126. # print(f'get_partial_values({key_ranges=})')
  127. return [
  128. await self.get(key, prototype, byte_range)
  129. for key, byte_range in key_ranges
  130. ]
  131. @property
  132. def supports_writes(self) -> bool:
  133. """Store supports writes."""
  134. return not self._read_only
  135. def _set(self, key: str, value: Buffer, /) -> None:
  136. """Store (key, value) pair."""
  137. raise NotImplementedError
  138. async def set(self, key: str, value: Buffer) -> None:
  139. """Store (key, value) pair."""
  140. self._set(key, value)
  141. @property
  142. def supports_deletes(self) -> bool:
  143. """Store supports deletes."""
  144. return False
  145. async def delete(self, key: str) -> None:
  146. """Remove key from store."""
  147. raise PermissionError('ZarrStore does not support deletes')
  148. @property
  149. def supports_listing(self) -> bool:
  150. """Store supports listing."""
  151. return True
  152. async def list(self) -> AsyncIterator[str]:
  153. """Return all keys in store."""
  154. for key in self._store:
  155. yield key
  156. async def list_prefix(self, prefix: str) -> AsyncIterator[str]:
  157. """Return all keys in store that begin with prefix.
  158. Keys are returned relative to the root of the store.
  159. """
  160. for key in list(self._store):
  161. if key.startswith(prefix):
  162. yield key
  163. async def list_dir(self, prefix: str) -> AsyncIterator[str]:
  164. """Return all keys and prefixes with prefix.
  165. Keys and prefixes do not contain the character "/" after the given
  166. prefix.
  167. """
  168. prefix = prefix.rstrip('/')
  169. if prefix == '':
  170. keys_unique = {k.split('/')[0] for k in self._store}
  171. else:
  172. keys_unique = {
  173. key.removeprefix(prefix + '/').split('/')[0]
  174. for key in self._store
  175. if key.startswith(prefix + '/') and key != prefix
  176. }
  177. for key in keys_unique:
  178. yield key
  179. @property
  180. def is_multiscales(self) -> bool:
  181. """Return whether ZarrStore contains multiscales."""
  182. return b'multiscales' in self._store['.zattrs']
  183. def __repr__(self) -> str:
  184. return f'{self.__class__.__name__}'
  185. # async def _get_many(
  186. # self,
  187. # requests: Iterable[tuple[str, BufferPrototype, ByteRequest | None]]
  188. # ) -> AsyncGenerator[tuple[str, Buffer | None], None]:
  189. # print(f'_get_many({requests=})')
  190. # return super()._get_many(requests)
  191. # async def getsize_prefix(self, prefix: str) -> int:
  192. # print(f'getsize_prefix({prefix=})')
  193. # return super().getsize_prefix(prefix)
  194. class ZarrTiffStore(ZarrStore):
  195. """Zarr 3 store interface to image array in TiffPage or TiffPageSeries.
  196. ZarrTiffStore is using a TiffFile instance for reading and decoding chunks.
  197. Therefore, ZarrTiffStore instances cannot be pickled.
  198. For writing, image data must be stored in uncompressed, unpredicted,
  199. and unpacked form. Sparse strips and tiles are not written.
  200. Parameters:
  201. arg:
  202. TIFF page or series to wrap as Zarr store.
  203. level:
  204. Pyramidal level to wrap. The default is 0.
  205. chunkmode:
  206. Use strips or tiles (0) or whole page data (2) as chunks.
  207. The default is 0.
  208. fillvalue:
  209. Value to use for missing chunks. The default is 0.
  210. zattrs:
  211. Additional attributes to store in `.zattrs`.
  212. multiscales:
  213. Create a multiscales-compatible Zarr group store.
  214. By default, create a Zarr array store for pages and non-pyramidal
  215. series.
  216. lock:
  217. Reentrant lock to synchronize seeks and reads from file.
  218. By default, the lock of the parent's file handle is used.
  219. squeeze:
  220. Remove length-1 dimensions from shape of TiffPageSeries.
  221. maxworkers:
  222. If `chunkmode=0`, asynchronously run chunk decode function
  223. in separate thread if greater than 1.
  224. If `chunkmode=2`, maximum number of threads to concurrently decode
  225. strips or tiles.
  226. If *None* or *0*, use up to :py:attr:`_TIFF.MAXWORKERS` or
  227. asyncio assigned threads.
  228. buffersize:
  229. Approximate number of bytes to read from file in one pass
  230. if `chunkmode=2`. The default is :py:attr:`_TIFF.BUFFERSIZE`.
  231. read_only:
  232. Passed to :py:class:`zarr.abc.store.Store`.
  233. _openfiles:
  234. Internal API.
  235. """
  236. _data: list[TiffPageSeries]
  237. _filecache: FileCache
  238. _transform: Callable[[NDArray[Any]], NDArray[Any]] | None
  239. _maxworkers: int
  240. _buffersize: int | None
  241. _squeeze: bool | None
  242. _multiscales: bool
  243. def __init__(
  244. self,
  245. arg: TiffPage | TiffFrame | TiffPageSeries,
  246. /,
  247. *,
  248. level: int | None = None,
  249. chunkmode: CHUNKMODE | int | str | None = None,
  250. fillvalue: float | None = None,
  251. zattrs: dict[str, Any] | None = None,
  252. multiscales: bool | None = None,
  253. lock: threading.RLock | NullContext | None = None,
  254. squeeze: bool | None = None,
  255. maxworkers: int | None = None,
  256. buffersize: int | None = None,
  257. read_only: bool | None = None,
  258. _openfiles: int | None = None,
  259. **kwargs: Any,
  260. ) -> None:
  261. if chunkmode is None:
  262. self._chunkmode = CHUNKMODE(0)
  263. else:
  264. self._chunkmode = enumarg(CHUNKMODE, chunkmode)
  265. if self._chunkmode not in {0, 2}:
  266. raise NotImplementedError(f'{self._chunkmode!r} not implemented')
  267. self._squeeze = None if squeeze is None else bool(squeeze)
  268. self._buffersize = buffersize
  269. if isinstance(arg, TiffPageSeries):
  270. self._data = arg.levels
  271. self._transform = arg.transform
  272. if multiscales is not None and not multiscales:
  273. level = 0
  274. if level is not None:
  275. self._data = [self._data[level]]
  276. name = arg.name
  277. else:
  278. self._data = [TiffPageSeries([arg])]
  279. self._transform = None
  280. name = 'Unnamed'
  281. if not maxworkers:
  282. maxworkers = self._data[0].keyframe.maxworkers
  283. if maxworkers < 3 and self._chunkmode == 0:
  284. maxworkers = 1
  285. self._maxworkers = maxworkers
  286. fh = self._data[0].keyframe.parent._parent.filehandle
  287. if read_only is None:
  288. read_only = not fh.writable() or self._chunkmode != 0
  289. super().__init__(
  290. fillvalue=fillvalue, chunkmode=self._chunkmode, read_only=read_only
  291. )
  292. if lock is None:
  293. fh.set_lock(True)
  294. lock = fh.lock
  295. self._filecache = FileCache(size=_openfiles, lock=lock)
  296. zattrs = {} if zattrs is None else dict(zattrs)
  297. # TODO: Zarr Encoding Specification
  298. # https://xarray.pydata.org/en/stable/internals/zarr-encoding-spec.html
  299. if multiscales or len(self._data) > 1:
  300. # multiscales
  301. self._multiscales = True
  302. if '_ARRAY_DIMENSIONS' in zattrs:
  303. array_dimensions = zattrs.pop('_ARRAY_DIMENSIONS')
  304. else:
  305. array_dimensions = list(self._data[0].get_axes(squeeze))
  306. self._store['.zgroup'] = _json_dumps({'zarr_format': 2})
  307. self._store['.zattrs'] = _json_dumps(
  308. {
  309. # TODO: use https://ngff.openmicroscopy.org/latest/
  310. 'multiscales': [
  311. {
  312. 'version': '0.1',
  313. 'name': name,
  314. 'datasets': [
  315. {'path': str(i)}
  316. for i in range(len(self._data))
  317. ],
  318. # 'axes': [...]
  319. # 'type': 'unknown',
  320. 'metadata': {},
  321. }
  322. ],
  323. **zattrs,
  324. }
  325. )
  326. shape0 = self._data[0].get_shape(squeeze)
  327. for level, series in enumerate(self._data):
  328. keyframe = series.keyframe
  329. keyframe.decode # noqa: B018 - cache decode function
  330. shape = series.get_shape(squeeze)
  331. dtype = series.dtype
  332. if fillvalue is None:
  333. self._fillvalue = fillvalue = keyframe.nodata
  334. chunks = keyframe.shape if self._chunkmode else keyframe.chunks
  335. self._store[f'{level}/.zattrs'] = _json_dumps(
  336. {
  337. '_ARRAY_DIMENSIONS': [
  338. (f'{ax}{level}' if i != j else ax)
  339. for ax, i, j in zip(
  340. array_dimensions, shape, shape0, strict=True
  341. )
  342. ]
  343. }
  344. )
  345. self._store[f'{level}/.zarray'] = _json_dumps(
  346. {
  347. 'zarr_format': 2,
  348. 'shape': shape,
  349. 'chunks': _chunks(chunks, shape, keyframe.shaped),
  350. 'dtype': _dtype_str(dtype),
  351. 'compressor': None,
  352. 'fill_value': _json_value(fillvalue, dtype),
  353. 'order': 'C',
  354. 'filters': None,
  355. }
  356. )
  357. if not self._read_only:
  358. self._read_only = not _is_writable(keyframe)
  359. else:
  360. self._multiscales = False
  361. series = self._data[0]
  362. keyframe = series.keyframe
  363. keyframe.decode # noqa: B018 - cache decode function
  364. shape = series.get_shape(squeeze)
  365. dtype = series.dtype
  366. if fillvalue is None:
  367. self._fillvalue = fillvalue = keyframe.nodata
  368. chunks = keyframe.shape if self._chunkmode else keyframe.chunks
  369. if '_ARRAY_DIMENSIONS' not in zattrs:
  370. zattrs['_ARRAY_DIMENSIONS'] = list(series.get_axes(squeeze))
  371. self._store['.zattrs'] = _json_dumps(zattrs)
  372. self._store['.zarray'] = _json_dumps(
  373. {
  374. 'zarr_format': 2,
  375. 'shape': shape,
  376. 'chunks': _chunks(chunks, shape, keyframe.shaped),
  377. 'dtype': _dtype_str(dtype),
  378. 'compressor': None,
  379. 'fill_value': _json_value(fillvalue, dtype),
  380. 'order': 'C',
  381. 'filters': None,
  382. }
  383. )
  384. if not self._read_only:
  385. self._read_only = not _is_writable(keyframe)
  386. def close(self) -> None:
  387. """Close store."""
  388. super().close()
  389. self._filecache.clear()
  390. def write_fsspec(
  391. self,
  392. jsonfile: str | os.PathLike[Any] | TextIO,
  393. /,
  394. url: str | None,
  395. *,
  396. groupname: str | None = None,
  397. templatename: str | None = None,
  398. compressors: dict[COMPRESSION | int, str | None] | None = None,
  399. version: int | None = None,
  400. _shape: Sequence[int] | None = None,
  401. _axes: Sequence[str] | None = None,
  402. _index: Sequence[int] | None = None,
  403. _append: bool = False,
  404. _close: bool = True,
  405. ) -> None:
  406. """Write fsspec ReferenceFileSystem as JSON to file.
  407. Parameters:
  408. jsonfile:
  409. Name or open file handle of output JSON file.
  410. url:
  411. Remote location of TIFF file(s) without file name(s).
  412. groupname:
  413. Zarr group name.
  414. templatename:
  415. Version 1 URL template name. The default is 'u'.
  416. compressors:
  417. Mapping of :py:class:`COMPRESSION` codes to Numcodecs codec
  418. names.
  419. version:
  420. Version of fsspec file to write. The default is 0.
  421. _shape:
  422. Shape of file sequence (experimental).
  423. _axes:
  424. Axes of file sequence (experimental).
  425. _index
  426. Index of file in sequence (experimental).
  427. _append:
  428. If *True*, only write index keys and values (experimental).
  429. _close:
  430. If *True*, no more appends (experimental).
  431. Raises:
  432. ValueError:
  433. ZarrTiffStore cannot be represented as ReferenceFileSystem
  434. due to features that are not supported by Zarr, Numcodecs,
  435. or Imagecodecs:
  436. - compressors, such as CCITT
  437. - filters, such as bitorder reversal, packed integers
  438. - dtypes, such as float24, complex integers
  439. - JPEGTables in multi-page series
  440. - incomplete chunks, such as `imagelength % rowsperstrip != 0`
  441. Files containing incomplete tiles may fail at runtime.
  442. Notes:
  443. Parameters `_shape`, `_axes`, `_index`, `_append`, and `_close`
  444. are an experimental API for joining the ReferenceFileSystems of
  445. multiple files of a TiffSequence.
  446. References:
  447. - `fsspec ReferenceFileSystem format
  448. <https://github.com/fsspec/kerchunk>`_
  449. """
  450. compressors = {
  451. 1: None,
  452. 8: 'zlib',
  453. 32946: 'zlib',
  454. 34925: 'lzma',
  455. 50013: 'zlib', # pixtiff
  456. 5: 'imagecodecs_lzw',
  457. 7: 'imagecodecs_jpeg',
  458. 22610: 'imagecodecs_jpegxr',
  459. 32773: 'imagecodecs_packbits',
  460. 33003: 'imagecodecs_jpeg2k',
  461. 33004: 'imagecodecs_jpeg2k',
  462. 33005: 'imagecodecs_jpeg2k',
  463. 33007: 'imagecodecs_jpeg',
  464. 34712: 'imagecodecs_jpeg2k',
  465. 34887: 'imagecodecs_lerc',
  466. 34892: 'imagecodecs_jpeg',
  467. 34933: 'imagecodecs_png',
  468. 34934: 'imagecodecs_jpegxr',
  469. 48124: 'imagecodecs_jetraw',
  470. 50000: 'imagecodecs_zstd', # numcodecs.zstd fails w/ unknown sizes
  471. 50001: 'imagecodecs_webp',
  472. 50002: 'imagecodecs_jpegxl',
  473. 52546: 'imagecodecs_jpegxl',
  474. **({} if compressors is None else compressors),
  475. }
  476. for series in self._data:
  477. errormsg = ' not supported by the fsspec ReferenceFileSystem'
  478. keyframe = series.keyframe
  479. if (
  480. keyframe.compression in {65000, 65001, 65002}
  481. and keyframe.parent.is_eer
  482. ):
  483. compressors[keyframe.compression] = 'imagecodecs_eer'
  484. if keyframe.compression not in compressors:
  485. raise ValueError(f'{keyframe.compression!r} is' + errormsg)
  486. if keyframe.fillorder != 1:
  487. raise ValueError(f'{keyframe.fillorder!r} is' + errormsg)
  488. if keyframe.sampleformat not in {1, 2, 3, 6}:
  489. # TODO: support float24 and cint via filters?
  490. raise ValueError(f'{keyframe.sampleformat!r} is' + errormsg)
  491. if (
  492. keyframe.bitspersample
  493. not in {
  494. 8,
  495. 16,
  496. 32,
  497. 64,
  498. 128,
  499. }
  500. and keyframe.compression
  501. not in {
  502. # JPEG
  503. 7,
  504. 33007,
  505. 34892,
  506. }
  507. and compressors[keyframe.compression] != 'imagecodecs_eer'
  508. ):
  509. raise ValueError(
  510. f'BitsPerSample {keyframe.bitspersample} is' + errormsg
  511. )
  512. if (
  513. not self._chunkmode
  514. and not keyframe.is_tiled
  515. and keyframe.imagelength % keyframe.rowsperstrip
  516. ):
  517. raise ValueError('incomplete chunks are' + errormsg)
  518. if self._chunkmode and not keyframe.is_final:
  519. raise ValueError(f'{self._chunkmode!r} is' + errormsg)
  520. if keyframe.jpegtables is not None and len(series.pages) > 1:
  521. raise ValueError(
  522. 'JPEGTables in multi-page files are' + errormsg
  523. )
  524. if url is None:
  525. url = ''
  526. elif url and url[-1] != '/':
  527. url += '/'
  528. url = url.replace('\\', '/')
  529. if groupname is None:
  530. groupname = ''
  531. elif groupname and groupname[-1] != '/':
  532. groupname += '/'
  533. byteorder: ByteOrder | None = '<' if sys.byteorder == 'big' else '>'
  534. if (
  535. self._data[0].keyframe.parent.byteorder != byteorder
  536. or self._data[0].keyframe.dtype is None
  537. or self._data[0].keyframe.dtype.itemsize == 1
  538. ):
  539. byteorder = None
  540. index: str
  541. _shape = [] if _shape is None else list(_shape)
  542. _axes = [] if _axes is None else list(_axes)
  543. if len(_shape) != len(_axes):
  544. raise ValueError('len(_shape) != len(_axes)')
  545. if _index is None:
  546. index = ''
  547. elif len(_shape) != len(_index):
  548. raise ValueError('len(_shape) != len(_index)')
  549. elif _index:
  550. index = '.'.join(str(i) for i in _index)
  551. index += '.'
  552. refs: dict[str, Any] = {}
  553. refzarr: dict[str, Any]
  554. if version == 1:
  555. if _append:
  556. raise ValueError('cannot append to version 1')
  557. if templatename is None:
  558. templatename = 'u'
  559. refs['version'] = 1
  560. refs['templates'] = {}
  561. refs['gen'] = []
  562. templates = {}
  563. if self._data[0].is_multifile:
  564. i = 0
  565. for page in self._data[0].pages:
  566. if page is None or page.keyframe is None:
  567. continue
  568. fname = page.keyframe.parent.filehandle.name
  569. if fname in templates:
  570. continue
  571. key = f'{templatename}{i}'
  572. templates[fname] = f'{{{{{key}}}}}'
  573. refs['templates'][key] = url + fname
  574. i += 1
  575. else:
  576. fname = self._data[0].keyframe.parent.filehandle.name
  577. key = f'{templatename}'
  578. templates[fname] = f'{{{{{key}}}}}'
  579. refs['templates'][key] = url + fname
  580. refs['refs'] = refzarr = {}
  581. else:
  582. refzarr = refs
  583. if not _append:
  584. if groupname:
  585. # TODO: support nested groups
  586. refzarr['.zgroup'] = _json_dumps({'zarr_format': 2}).decode()
  587. for item in self._store.items():
  588. key, value = item
  589. if '.zattrs' in key and _axes:
  590. value = json.loads(value)
  591. if '_ARRAY_DIMENSIONS' in value:
  592. value['_ARRAY_DIMENSIONS'] = (
  593. _axes + value['_ARRAY_DIMENSIONS']
  594. )
  595. value = _json_dumps(value)
  596. elif '.zarray' in key:
  597. value = json.loads(value)
  598. level = int(key.split('/')[0]) if '/' in key else 0
  599. keyframe = self._data[level].keyframe
  600. if _shape:
  601. value['shape'] = _shape + value['shape']
  602. value['chunks'] = [1] * len(_shape) + value['chunks']
  603. codec_id = compressors[keyframe.compression]
  604. if codec_id == 'imagecodecs_jpeg':
  605. # TODO: handle JPEG color spaces
  606. jpegtables = keyframe.jpegtables
  607. if jpegtables is None:
  608. tables = None
  609. else:
  610. import base64
  611. tables = base64.b64encode(jpegtables).decode()
  612. jpegheader = keyframe.jpegheader
  613. if jpegheader is None:
  614. header = None
  615. else:
  616. import base64
  617. header = base64.b64encode(jpegheader).decode()
  618. (
  619. colorspace_jpeg,
  620. colorspace_data,
  621. ) = jpeg_decode_colorspace(
  622. keyframe.photometric,
  623. keyframe.planarconfig,
  624. keyframe.extrasamples,
  625. keyframe.is_jfif,
  626. )
  627. value['compressor'] = {
  628. 'id': codec_id,
  629. 'tables': tables,
  630. 'header': header,
  631. 'bitspersample': keyframe.bitspersample,
  632. 'colorspace_jpeg': colorspace_jpeg,
  633. 'colorspace_data': colorspace_data,
  634. }
  635. elif (
  636. codec_id == 'imagecodecs_webp'
  637. and keyframe.samplesperpixel == 4
  638. ):
  639. value['compressor'] = {
  640. 'id': codec_id,
  641. 'hasalpha': True,
  642. }
  643. elif codec_id == 'imagecodecs_eer':
  644. horzbits = vertbits = 2
  645. if keyframe.compression == 65002:
  646. skipbits = int(keyframe.tags.valueof(65007, 7))
  647. horzbits = int(keyframe.tags.valueof(65008, 2))
  648. vertbits = int(keyframe.tags.valueof(65009, 2))
  649. elif keyframe.compression == 65001:
  650. skipbits = 7
  651. else:
  652. skipbits = 8
  653. value['compressor'] = {
  654. 'id': codec_id,
  655. 'shape': keyframe.chunks,
  656. 'skipbits': skipbits,
  657. 'horzbits': horzbits,
  658. 'vertbits': vertbits,
  659. 'superres': keyframe.parent._superres,
  660. }
  661. elif codec_id is not None:
  662. value['compressor'] = {'id': codec_id}
  663. if byteorder is not None:
  664. value['dtype'] = byteorder + value['dtype'][1:]
  665. if keyframe.predictor > 1:
  666. # predictors need access to chunk shape and dtype
  667. # requires imagecodecs > 2021.8.26 to read
  668. if keyframe.predictor in {2, 34892, 34893}:
  669. filter_id = 'imagecodecs_delta'
  670. else:
  671. filter_id = 'imagecodecs_floatpred'
  672. if keyframe.predictor <= 3:
  673. dist = 1
  674. elif keyframe.predictor in {34892, 34894}:
  675. dist = 2
  676. else:
  677. dist = 4
  678. if (
  679. keyframe.planarconfig == 1
  680. and keyframe.samplesperpixel > 1
  681. ):
  682. axis = -2
  683. else:
  684. axis = -1
  685. value['filters'] = [
  686. {
  687. 'id': filter_id,
  688. 'axis': axis,
  689. 'dist': dist,
  690. 'shape': value['chunks'],
  691. 'dtype': value['dtype'],
  692. }
  693. ]
  694. value = _json_dumps(value)
  695. # else:
  696. # pass through value
  697. refzarr[groupname + key] = value.decode()
  698. fh: TextIO
  699. if hasattr(jsonfile, 'write'):
  700. fh = jsonfile # type: ignore[assignment]
  701. else:
  702. fh = open(jsonfile, 'w', encoding='utf-8') # noqa: SIM115
  703. if version == 1:
  704. fh.write(json.dumps(refs, indent=1).rsplit('}"', 1)[0] + '}"')
  705. indent = ' '
  706. elif _append:
  707. indent = ' '
  708. else:
  709. fh.write(json.dumps(refs, indent=1)[:-2])
  710. indent = ' '
  711. offset: int | None
  712. for item in self._store.items():
  713. key, value = item
  714. if '.zarray' in key:
  715. value = json.loads(value)
  716. shape = value['shape']
  717. chunks = value['chunks']
  718. levelstr = (key.split('/')[0] + '/') if '/' in key else ''
  719. for chunkindex in _ndindex(shape, chunks):
  720. key = levelstr + chunkindex
  721. keyframe, page, _, offset, bytecount = self._parse_key(key)
  722. if page and self._chunkmode and offset is None:
  723. offset = page.dataoffsets[0]
  724. bytecount = keyframe.nbytes
  725. if offset and bytecount:
  726. fname = keyframe.parent.filehandle.name
  727. if version == 1:
  728. fname = templates[fname]
  729. else:
  730. fname = f'{url}{fname}'
  731. fh.write(
  732. f',\n{indent}"{groupname}{key}": '
  733. f'["{fname}", {offset}, {bytecount}]'
  734. )
  735. # TODO: support nested groups
  736. if version == 1:
  737. fh.write('\n }\n}')
  738. elif _close:
  739. fh.write('\n}')
  740. if not hasattr(jsonfile, 'write'):
  741. fh.close()
  742. async def get(
  743. self,
  744. key: str,
  745. prototype: BufferPrototype,
  746. byte_range: ByteRequest | None = None,
  747. ) -> Buffer | None:
  748. """Return value associated with key."""
  749. # print(f'get({key=}, {byte_range=})')
  750. if byte_range is not None:
  751. raise NotImplementedError(f'{byte_range=!r} not supported')
  752. if key in self._store:
  753. return prototype.buffer.from_bytes(self._store[key])
  754. if (
  755. key == 'zarr.json'
  756. or key[-10:] == '.zmetadata'
  757. or key[-7:] == '.zarray'
  758. or key[-7:] == '.zgroup'
  759. ):
  760. # catch '.zarray' and 'attribute/.zarray'
  761. return None
  762. keyframe, page, chunkindex, offset, bytecount = self._parse_key(key)
  763. if page is None or offset == 0 or bytecount == 0:
  764. return None
  765. fh = page.parent.filehandle
  766. if self._chunkmode:
  767. if offset is not None:
  768. # contiguous image data in page or series
  769. # create virtual frame instead of loading page from file
  770. assert bytecount is not None
  771. page = TiffFrame(
  772. page.parent,
  773. index=0,
  774. keyframe=keyframe,
  775. dataoffsets=(offset,),
  776. databytecounts=(bytecount,),
  777. )
  778. # TODO: use asyncio.to_thread ?
  779. self._filecache.open(fh)
  780. chunk = page.asarray(
  781. lock=self._filecache.lock,
  782. maxworkers=self._maxworkers,
  783. buffersize=self._buffersize,
  784. )
  785. self._filecache.close(fh)
  786. if self._transform is not None:
  787. chunk = self._transform(chunk)
  788. return prototype.buffer(chunk.reshape(-1).view('B'))
  789. assert offset is not None
  790. assert bytecount is not None
  791. chunk_bytes = self._filecache.read(fh, offset, bytecount)
  792. decodeargs: dict[str, Any] = {'_fullsize': True}
  793. if page.jpegtables is not None:
  794. decodeargs['jpegtables'] = page.jpegtables
  795. if keyframe.jpegheader is not None:
  796. decodeargs['jpegheader'] = keyframe.jpegheader
  797. assert chunkindex is not None
  798. keyframe.decode # noqa: B018 - cache decode function
  799. if self._maxworkers > 1:
  800. decoded = await asyncio.to_thread(
  801. keyframe.decode, chunk_bytes, chunkindex, **decodeargs
  802. )
  803. else:
  804. decoded = keyframe.decode(chunk_bytes, chunkindex, **decodeargs)
  805. chunk = decoded[0] # type: ignore[assignment]
  806. del decoded
  807. assert chunk is not None
  808. if self._transform is not None:
  809. chunk = self._transform(chunk)
  810. if self._chunkmode: # noqa: SIM108
  811. chunks = keyframe.shape # type: ignore[unreachable]
  812. else:
  813. chunks = keyframe.chunks
  814. if chunk.size != product(chunks):
  815. raise RuntimeError(f'{chunk.size} != {product(chunks)}')
  816. return prototype.buffer(chunk.reshape(-1).view('B'))
  817. async def exists(self, key: str) -> bool:
  818. """Return whether key exists in store."""
  819. # print(f'exists({key=})')
  820. if key in self._store:
  821. return True
  822. assert isinstance(key, str)
  823. try:
  824. _, page, _, offset, bytecount = self._parse_key(key)
  825. except (KeyError, IndexError):
  826. return False
  827. if self._chunkmode and offset is None:
  828. return True
  829. return (
  830. page is not None
  831. and offset is not None
  832. and bytecount is not None
  833. and offset > 0
  834. and bytecount > 0
  835. )
  836. async def set(self, key: str, value: Buffer) -> None:
  837. """Store (key, value) pair."""
  838. if self._read_only:
  839. raise PermissionError('ZarrTiffStore is read-only')
  840. if (
  841. key in self._store
  842. or key == 'zarr.json'
  843. or key[-10:] == '.zmetadata'
  844. or key[-7:] == '.zarray'
  845. or key[-7:] == '.zgroup'
  846. ):
  847. # catch '.zarray' and 'attribute/.zarray'
  848. return
  849. _keyframe, page, _chunkindex, offset, bytecount = self._parse_key(key)
  850. if (
  851. page is None
  852. or offset is None
  853. or offset == 0
  854. or bytecount is None
  855. or bytecount == 0
  856. ):
  857. return
  858. data = value.to_bytes()
  859. if bytecount < len(data):
  860. data = data[:bytecount]
  861. self._filecache.write(page.parent.filehandle, offset, data)
  862. def _parse_key(self, key: str, /) -> tuple[
  863. TiffPage,
  864. TiffPage | TiffFrame | None,
  865. int | None,
  866. int | None,
  867. int | None,
  868. ]:
  869. """Return keyframe, page, index, offset, and bytecount from key.
  870. Raise KeyError if key is not valid.
  871. """
  872. if self._multiscales:
  873. try:
  874. level, key = key.split('/')
  875. series = self._data[int(level)]
  876. except (ValueError, IndexError) as exc:
  877. raise KeyError(key) from exc
  878. else:
  879. series = self._data[0]
  880. keyframe = series.keyframe
  881. pageindex, chunkindex = self._indices(key, series)
  882. if series.dataoffset is not None:
  883. # contiguous or truncated
  884. page = series[0]
  885. if page is None or page.dtype is None or page.keyframe is None:
  886. return keyframe, None, chunkindex, 0, 0
  887. offset = pageindex * page.size * page.dtype.itemsize
  888. try:
  889. offset += page.dataoffsets[chunkindex]
  890. except IndexError as exc:
  891. raise KeyError(key) from exc
  892. if self._chunkmode:
  893. bytecount = page.size * page.dtype.itemsize
  894. return page.keyframe, page, chunkindex, offset, bytecount
  895. elif self._chunkmode:
  896. with self._filecache.lock:
  897. page = series[pageindex]
  898. if page is None or page.keyframe is None:
  899. return keyframe, None, None, 0, 0
  900. return page.keyframe, page, None, None, None
  901. else:
  902. with self._filecache.lock:
  903. page = series[pageindex]
  904. if page is None or page.keyframe is None:
  905. return keyframe, None, chunkindex, 0, 0
  906. try:
  907. offset = page.dataoffsets[chunkindex]
  908. except IndexError:
  909. # raise KeyError(key) from exc
  910. # issue #249: Philips may be missing last row of tiles
  911. return page.keyframe, page, chunkindex, 0, 0
  912. try:
  913. bytecount = page.databytecounts[chunkindex]
  914. except IndexError as exc:
  915. raise KeyError(key) from exc
  916. return page.keyframe, page, chunkindex, offset, bytecount
  917. def _indices(self, key: str, series: TiffPageSeries, /) -> tuple[int, int]:
  918. """Return page and strile indices from Zarr chunk index."""
  919. keyframe = series.keyframe
  920. shape = series.get_shape(self._squeeze)
  921. try:
  922. indices = [int(i) for i in key.split('.')]
  923. except ValueError as exc:
  924. raise KeyError(key) from exc
  925. assert len(indices) == len(shape)
  926. if self._chunkmode:
  927. chunked = (1,) * len(keyframe.shape)
  928. else:
  929. chunked = keyframe.chunked
  930. p = 1
  931. for index, s in enumerate(shape[::-1]):
  932. p *= s
  933. if p == keyframe.size:
  934. i = len(indices) - index - 1
  935. frames_indices = indices[:i]
  936. strile_indices = indices[i:]
  937. frames_chunked = shape[:i]
  938. strile_chunked = list(shape[i:]) # updated later
  939. break
  940. else:
  941. raise RuntimeError
  942. if len(strile_chunked) == len(keyframe.shape):
  943. strile_chunked = list(chunked)
  944. else:
  945. # get strile_chunked including singleton dimensions
  946. i = len(strile_indices) - 1
  947. j = len(keyframe.shape) - 1
  948. while True:
  949. if strile_chunked[i] == keyframe.shape[j]:
  950. strile_chunked[i] = chunked[j]
  951. i -= 1
  952. j -= 1
  953. elif strile_chunked[i] == 1:
  954. i -= 1
  955. else:
  956. raise RuntimeError('shape does not match page shape')
  957. if i < 0 or j < 0:
  958. break
  959. assert product(strile_chunked) == product(chunked)
  960. if len(frames_indices) > 0:
  961. frameindex = int(
  962. numpy.ravel_multi_index(frames_indices, frames_chunked)
  963. )
  964. else:
  965. frameindex = 0
  966. if len(strile_indices) > 0:
  967. strileindex = int(
  968. numpy.ravel_multi_index(strile_indices, strile_chunked)
  969. )
  970. else:
  971. strileindex = 0
  972. return frameindex, strileindex
  973. class ZarrFileSequenceStore(ZarrStore):
  974. """Zarr 3 store interface to image array in FileSequence.
  975. Parameters:
  976. filesequence:
  977. FileSequence instance to wrap as Zarr store.
  978. Files in containers are not supported.
  979. fillvalue:
  980. Value to use for missing chunks. The default is 0.
  981. chunkmode:
  982. Currently only one chunk per file is supported.
  983. chunkshape:
  984. Shape of chunk in each file.
  985. Must match ``FileSequence.imread(file, **imreadargs).shape``.
  986. chunkdtype:
  987. Data type of chunk in each file.
  988. Must match ``FileSequence.imread(file, **imreadargs).dtype``.
  989. axestiled:
  990. Axes to be tiled. Map stacked sequence axis to chunk axis.
  991. zattrs:
  992. Additional attributes to store in `.zattrs`.
  993. ioworkers:
  994. If not 1, asynchronously run `imread` function in separate thread.
  995. If enabled, internal threading for the `imread` function
  996. should be disabled.
  997. read_only:
  998. Passed to :py:class:`zarr.abc.store.Store`.
  999. imreadargs:
  1000. Arguments passed to :py:attr:`FileSequence.imread`.
  1001. **kwargs:
  1002. Arguments passed to :py:attr:`FileSequence.imread`in addition
  1003. to `imreadargs`.
  1004. Notes:
  1005. If `chunkshape` or `chunkdtype` are *None* (default), their values
  1006. are determined by reading the first file with
  1007. ``FileSequence.imread(arg.files[0], **imreadargs)``.
  1008. """
  1009. imread: Callable[..., NDArray[Any]]
  1010. """Function to read image array from single file."""
  1011. _lookup: dict[tuple[int, ...], str]
  1012. _chunks: tuple[int, ...]
  1013. _dtype: numpy.dtype[Any]
  1014. _tiled: TiledSequence
  1015. _commonpath: str
  1016. _ioworkers: int
  1017. _kwargs: dict[str, Any]
  1018. def __init__(
  1019. self,
  1020. filesequence: FileSequence,
  1021. /,
  1022. *,
  1023. fillvalue: float | None = None,
  1024. chunkmode: CHUNKMODE | int | str | None = None,
  1025. chunkshape: Sequence[int] | None = None,
  1026. chunkdtype: DTypeLike | None = None,
  1027. axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None,
  1028. zattrs: dict[str, Any] | None = None,
  1029. ioworkers: int | None = 1,
  1030. imreadargs: dict[str, Any] | None = None,
  1031. read_only: bool = True,
  1032. **kwargs: Any,
  1033. ) -> None:
  1034. super().__init__(
  1035. fillvalue=fillvalue, chunkmode=chunkmode, read_only=read_only
  1036. )
  1037. if self._chunkmode not in {0, 3}:
  1038. raise ValueError(f'invalid chunkmode {self._chunkmode!r}')
  1039. if not isinstance(filesequence, FileSequence):
  1040. raise TypeError('not a FileSequence')
  1041. if filesequence._container:
  1042. raise NotImplementedError('cannot open container as Zarr store')
  1043. # TODO: deprecate kwargs?
  1044. if imreadargs is not None:
  1045. kwargs |= imreadargs
  1046. self._ioworkers = 1 if ioworkers is None else ioworkers
  1047. self._kwargs = kwargs
  1048. self._imread = filesequence.imread
  1049. self._commonpath = filesequence.commonpath()
  1050. if chunkshape is None or chunkdtype is None:
  1051. chunk = filesequence.imread(filesequence[0], **kwargs)
  1052. self._chunks = chunk.shape
  1053. self._dtype = chunk.dtype
  1054. else:
  1055. self._chunks = tuple(chunkshape)
  1056. self._dtype = numpy.dtype(chunkdtype)
  1057. chunk = None
  1058. self._tiled = TiledSequence(
  1059. filesequence.shape, self._chunks, axestiled=axestiled
  1060. )
  1061. self._lookup = dict(
  1062. zip(
  1063. self._tiled.indices(filesequence.indices),
  1064. filesequence,
  1065. strict=True,
  1066. )
  1067. )
  1068. zattrs = {} if zattrs is None else dict(zattrs)
  1069. # TODO: add _ARRAY_DIMENSIONS to ZarrFileSequenceStore
  1070. # if '_ARRAY_DIMENSIONS' not in zattrs:
  1071. # zattrs['_ARRAY_DIMENSIONS'] = list(...)
  1072. self._store['.zattrs'] = _json_dumps(zattrs)
  1073. self._store['.zarray'] = _json_dumps(
  1074. {
  1075. 'zarr_format': 2,
  1076. 'shape': self._tiled.shape,
  1077. 'chunks': self._tiled.chunks,
  1078. 'dtype': _dtype_str(self._dtype),
  1079. 'compressor': None,
  1080. 'fill_value': _json_value(fillvalue, self._dtype),
  1081. 'order': 'C',
  1082. 'filters': None,
  1083. }
  1084. )
  1085. async def exists(self, key: str) -> bool:
  1086. """Return whether key exists in store."""
  1087. # print(f'exists({key=})')
  1088. if key in self._store:
  1089. return True
  1090. assert isinstance(key, str)
  1091. try:
  1092. indices = tuple(int(i) for i in key.split('.'))
  1093. except Exception:
  1094. return False
  1095. return indices in self._lookup
  1096. async def get(
  1097. self,
  1098. key: str,
  1099. prototype: BufferPrototype,
  1100. byte_range: ByteRequest | None = None,
  1101. ) -> Buffer | None:
  1102. """Return value associated with key."""
  1103. if byte_range is not None:
  1104. raise NotImplementedError(f'{byte_range=!r} not supported')
  1105. if key in self._store:
  1106. return prototype.buffer.from_bytes(self._store[key])
  1107. if (
  1108. key == 'zarr.json'
  1109. or key[-10:] == '.zmetadata'
  1110. or key[-7:] == '.zarray'
  1111. or key[-7:] == '.zgroup'
  1112. ):
  1113. # catch '.zarray' and 'attribute/.zarray'
  1114. return None
  1115. indices = tuple(int(i) for i in key.split('.'))
  1116. filename = self._lookup.get(indices, None)
  1117. if filename is None:
  1118. return None
  1119. if self._ioworkers != 1:
  1120. chunk = await asyncio.to_thread(
  1121. self._imread, filename, **self._kwargs
  1122. )
  1123. else:
  1124. chunk = self._imread(filename, **self._kwargs)
  1125. return prototype.buffer(chunk.reshape(-1).view('B'))
  1126. def write_fsspec(
  1127. self,
  1128. jsonfile: str | os.PathLike[Any] | TextIO,
  1129. /,
  1130. url: str | None,
  1131. *,
  1132. quote: bool | None = None,
  1133. groupname: str | None = None,
  1134. templatename: str | None = None,
  1135. codec_id: str | None = None,
  1136. version: int | None = None,
  1137. _append: bool = False,
  1138. _close: bool = True,
  1139. ) -> None:
  1140. """Write fsspec ReferenceFileSystem as JSON to file.
  1141. Parameters:
  1142. jsonfile:
  1143. Name or open file handle of output JSON file.
  1144. url:
  1145. Remote location of TIFF file(s) without file name(s).
  1146. quote:
  1147. Quote file names, that is, replace ' ' with '%20'.
  1148. The default is True.
  1149. groupname:
  1150. Zarr group name.
  1151. templatename:
  1152. Version 1 URL template name. The default is 'u'.
  1153. codec_id:
  1154. Name of Numcodecs codec to decode files or chunks.
  1155. version:
  1156. Version of fsspec file to write. The default is 0.
  1157. _append, _close:
  1158. Experimental API.
  1159. References:
  1160. - `fsspec ReferenceFileSystem format
  1161. <https://github.com/fsspec/kerchunk>`_
  1162. """
  1163. from urllib.parse import quote as quote_
  1164. kwargs = self._kwargs.copy()
  1165. if codec_id is not None:
  1166. pass
  1167. elif self._imread is imread:
  1168. codec_id = 'tifffile'
  1169. elif 'imagecodecs' in self._imread.__module__:
  1170. if (
  1171. self._imread.__name__ != 'imread'
  1172. or 'codec' not in self._kwargs
  1173. ):
  1174. raise ValueError('cannot determine codec_id')
  1175. codec = kwargs.pop('codec')
  1176. if isinstance(codec, (list, tuple)):
  1177. codec = codec[0]
  1178. if callable(codec):
  1179. codec = codec.__name__.split('_')[0]
  1180. codec_id = {
  1181. 'apng': 'imagecodecs_apng',
  1182. 'avif': 'imagecodecs_avif',
  1183. 'gif': 'imagecodecs_gif',
  1184. 'heif': 'imagecodecs_heif',
  1185. 'jpeg': 'imagecodecs_jpeg',
  1186. 'jpeg8': 'imagecodecs_jpeg',
  1187. 'jpeg12': 'imagecodecs_jpeg',
  1188. 'jpeg2k': 'imagecodecs_jpeg2k',
  1189. 'jpegls': 'imagecodecs_jpegls',
  1190. 'jpegxl': 'imagecodecs_jpegxl',
  1191. 'jpegxr': 'imagecodecs_jpegxr',
  1192. 'ljpeg': 'imagecodecs_ljpeg',
  1193. 'lerc': 'imagecodecs_lerc',
  1194. # 'npy': 'imagecodecs_npy',
  1195. 'png': 'imagecodecs_png',
  1196. 'qoi': 'imagecodecs_qoi',
  1197. 'tiff': 'imagecodecs_tiff',
  1198. 'webp': 'imagecodecs_webp',
  1199. 'zfp': 'imagecodecs_zfp',
  1200. }[codec]
  1201. else:
  1202. # TODO: choose codec from filename
  1203. raise ValueError('cannot determine codec_id')
  1204. if url is None:
  1205. url = ''
  1206. elif url and url[-1] != '/':
  1207. url += '/'
  1208. if groupname is None:
  1209. groupname = ''
  1210. elif groupname and groupname[-1] != '/':
  1211. groupname += '/'
  1212. refs: dict[str, Any] = {}
  1213. if version == 1:
  1214. if _append:
  1215. raise ValueError('cannot append to version 1 files')
  1216. if templatename is None:
  1217. templatename = 'u'
  1218. refs['version'] = 1
  1219. refs['templates'] = {templatename: url}
  1220. refs['gen'] = []
  1221. refs['refs'] = refzarr = {}
  1222. url = f'{{{{{templatename}}}}}'
  1223. else:
  1224. refzarr = refs
  1225. if groupname and not _append:
  1226. refzarr['.zgroup'] = _json_dumps({'zarr_format': 2}).decode()
  1227. for item in self._store.items():
  1228. key, value = item
  1229. if '.zarray' in key:
  1230. value = json.loads(value)
  1231. # TODO: make kwargs serializable
  1232. value['compressor'] = {'id': codec_id, **kwargs}
  1233. value = _json_dumps(value)
  1234. refzarr[groupname + key] = value.decode()
  1235. fh: TextIO
  1236. if hasattr(jsonfile, 'write'):
  1237. fh = jsonfile # type: ignore[assignment]
  1238. else:
  1239. fh = open(jsonfile, 'w', encoding='utf-8') # noqa: SIM115
  1240. if version == 1:
  1241. fh.write(json.dumps(refs, indent=1).rsplit('}"', 1)[0] + '}"')
  1242. indent = ' '
  1243. elif _append:
  1244. fh.write(',\n')
  1245. fh.write(json.dumps(refs, indent=1)[2:-2])
  1246. indent = ' '
  1247. else:
  1248. fh.write(json.dumps(refs, indent=1)[:-2])
  1249. indent = ' '
  1250. prefix = len(self._commonpath)
  1251. for item in self._store.items():
  1252. key, value = item
  1253. if '.zarray' in key:
  1254. value = json.loads(value)
  1255. for index, fname in sorted(
  1256. self._lookup.items(), key=lambda x: x[0]
  1257. ):
  1258. filename = fname[prefix:].replace('\\', '/')
  1259. if quote is None or quote:
  1260. filename = quote_(filename)
  1261. if filename[0] == '/':
  1262. filename = filename[1:]
  1263. indexstr = '.'.join(str(i) for i in index)
  1264. fh.write(
  1265. f',\n{indent}"{groupname}{indexstr}": '
  1266. f'["{url}{filename}"]'
  1267. )
  1268. if version == 1:
  1269. fh.write('\n }\n}')
  1270. elif _close:
  1271. fh.write('\n}')
  1272. if not hasattr(jsonfile, 'write'):
  1273. fh.close()
  1274. def zarr_selection(
  1275. store: ZarrStore,
  1276. selection: BasicSelection,
  1277. /,
  1278. *,
  1279. groupindex: str | None = None,
  1280. close: bool = True,
  1281. out: OutputType = None,
  1282. ) -> NDArray[Any]:
  1283. """Return selection from Zarr store.
  1284. Parameters:
  1285. store:
  1286. ZarrStore instance to read selection from.
  1287. selection:
  1288. Subset of image to be extracted and returned.
  1289. Refer to the Zarr documentation for valid selections.
  1290. groupindex:
  1291. Index of array if store is Zarr group.
  1292. close:
  1293. Close store before returning.
  1294. out:
  1295. Specifies how image array is returned.
  1296. By default, create a new array.
  1297. If a *numpy.ndarray*, a writable array to which the images
  1298. are copied.
  1299. If *'memmap'*, create a memory-mapped array in a temporary
  1300. file.
  1301. If a *string* or *open file*, the file used to create a
  1302. memory-mapped array.
  1303. """
  1304. import zarr
  1305. from zarr.core.indexing import BasicIndexer
  1306. zarray: zarr.Array[Any]
  1307. z = zarr.open(store, mode='r', zarr_format=2)
  1308. try:
  1309. if isinstance(z, zarr.Group):
  1310. if groupindex is None:
  1311. groupindex = '0'
  1312. zarray = z[groupindex] # type: ignore[assignment]
  1313. else:
  1314. zarray = z
  1315. if out is not None:
  1316. shape = BasicIndexer(
  1317. selection,
  1318. shape=zarray.shape,
  1319. chunk_grid=RegularChunkGrid(chunk_shape=zarray.chunks),
  1320. ).shape
  1321. ndbuffer = NDBuffer.from_numpy_array(
  1322. create_output(out, shape, zarray.dtype)
  1323. )
  1324. else:
  1325. ndbuffer = None
  1326. result = zarray.get_basic_selection(selection, out=ndbuffer)
  1327. del zarray
  1328. finally:
  1329. if close:
  1330. store.close()
  1331. return result # type: ignore[return-value]
  1332. def _empty_chunk(
  1333. shape: tuple[int, ...],
  1334. dtype: DTypeLike | None,
  1335. fillvalue: float | None,
  1336. /,
  1337. ) -> NDArray[Any]:
  1338. """Return empty chunk."""
  1339. if fillvalue is None or fillvalue == 0:
  1340. # return bytes(product(shape) * dtype.itemsize)
  1341. return numpy.zeros(shape, dtype)
  1342. chunk = numpy.empty(shape, dtype)
  1343. chunk[:] = fillvalue
  1344. return chunk # .tobytes()
  1345. def _dtype_str(dtype: numpy.dtype[Any], /) -> str:
  1346. """Return dtype as string with native byte order."""
  1347. if dtype.itemsize == 1:
  1348. byteorder = '|'
  1349. else:
  1350. byteorder = {'big': '>', 'little': '<'}[sys.byteorder]
  1351. return byteorder + dtype.str[1:]
  1352. def _json_dumps(obj: Any, /) -> bytes:
  1353. """Serialize object to JSON formatted string."""
  1354. return json.dumps(
  1355. obj,
  1356. indent=1,
  1357. sort_keys=True,
  1358. ensure_ascii=True,
  1359. separators=(',', ': '),
  1360. ).encode('ascii')
  1361. def _json_value(value: Any, dtype: numpy.dtype[Any], /) -> Any:
  1362. """Return value which is serializable to JSON."""
  1363. if value is None:
  1364. return value
  1365. if dtype.kind == 'b':
  1366. return bool(value)
  1367. if dtype.kind in 'ui':
  1368. return int(value)
  1369. if dtype.kind == 'f':
  1370. if numpy.isnan(value):
  1371. return 'NaN'
  1372. if numpy.isposinf(value):
  1373. return 'Infinity'
  1374. if numpy.isneginf(value):
  1375. return '-Infinity'
  1376. return float(value)
  1377. if dtype.kind == 'c':
  1378. value = numpy.array(value, dtype)
  1379. return (
  1380. _json_value(value.real, dtype.type().real.dtype),
  1381. _json_value(value.imag, dtype.type().imag.dtype),
  1382. )
  1383. return value
  1384. def _ndindex(
  1385. shape: tuple[int, ...], chunks: tuple[int, ...], /
  1386. ) -> Iterator[str]:
  1387. """Return iterator over all chunk index strings."""
  1388. assert len(shape) == len(chunks)
  1389. chunked = tuple(
  1390. i // j + (1 if i % j else 0)
  1391. for i, j in zip(shape, chunks, strict=True)
  1392. )
  1393. for indices in numpy.ndindex(chunked):
  1394. yield '.'.join(str(index) for index in indices)
  1395. def _is_writable(keyframe: TiffPage) -> bool:
  1396. """Return True if chunks are writable."""
  1397. return (
  1398. keyframe.compression == 1
  1399. and keyframe.fillorder == 1
  1400. and keyframe.sampleformat in {1, 2, 3, 6}
  1401. and keyframe.bitspersample in {8, 16, 32, 64, 128}
  1402. # and (
  1403. # keyframe.rowsperstrip == 0
  1404. # or keyframe.imagelength % keyframe.rowsperstrip == 0
  1405. # )
  1406. )
  1407. def _chunks(
  1408. chunks: tuple[int, ...],
  1409. shape: tuple[int, ...],
  1410. shaped: tuple[int, int, int, int, int],
  1411. /,
  1412. ) -> tuple[int, ...]:
  1413. """Return chunks with same length as shape."""
  1414. ndim = len(shape)
  1415. if ndim == 0:
  1416. return () # empty array
  1417. if 0 in shape:
  1418. return (1,) * ndim
  1419. d = 0 if shaped[1] == 1 else 1
  1420. i = min(ndim, 3 + d)
  1421. n = len(chunks)
  1422. if (
  1423. n == 2 + d
  1424. and i != 2 + d
  1425. and shape[-1] == 1
  1426. and shape[-i:] == shaped[-i:]
  1427. ):
  1428. # planarconfig=contig with one sample
  1429. chunks = (*chunks, 1)
  1430. if ndim < len(chunks):
  1431. # remove leading dimensions of size 1 from chunks
  1432. i = 0
  1433. for size in chunks:
  1434. if size > 1:
  1435. break
  1436. i += 1
  1437. chunks = chunks[i:]
  1438. if ndim < len(chunks):
  1439. raise ValueError(f'{shape=!r} is shorter than {chunks=!r}')
  1440. # prepend size 1 dimensions to chunks to match length of shape
  1441. return tuple([1] * (ndim - len(chunks)) + list(chunks))