_dicom.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. # -*- coding: utf-8 -*-
  2. # imageio is distributed under the terms of the (new) BSD License.
  3. """Plugin for reading DICOM files."""
  4. # todo: Use pydicom:
  5. # * Note: is not py3k ready yet
  6. # * Allow reading the full meta info
  7. # I think we can more or less replace the SimpleDicomReader with a
  8. # pydicom.Dataset For series, only ned to read the full info from one
  9. # file: speed still high
  10. # * Perhaps allow writing?
  11. import sys
  12. import os
  13. import struct
  14. import logging
  15. import numpy as np
  16. logger = logging.getLogger(__name__)
  17. # Determine endianity of system
  18. sys_is_little_endian = sys.byteorder == "little"
  19. # Define a dictionary that contains the tags that we would like to know
  20. MINIDICT = {
  21. (0x7FE0, 0x0010): ("PixelData", "OB"),
  22. # Date and time
  23. (0x0008, 0x0020): ("StudyDate", "DA"),
  24. (0x0008, 0x0021): ("SeriesDate", "DA"),
  25. (0x0008, 0x0022): ("AcquisitionDate", "DA"),
  26. (0x0008, 0x0023): ("ContentDate", "DA"),
  27. (0x0008, 0x0030): ("StudyTime", "TM"),
  28. (0x0008, 0x0031): ("SeriesTime", "TM"),
  29. (0x0008, 0x0032): ("AcquisitionTime", "TM"),
  30. (0x0008, 0x0033): ("ContentTime", "TM"),
  31. # With what, where, by whom?
  32. (0x0008, 0x0060): ("Modality", "CS"),
  33. (0x0008, 0x0070): ("Manufacturer", "LO"),
  34. (0x0008, 0x0080): ("InstitutionName", "LO"),
  35. # Descriptions
  36. (0x0008, 0x1030): ("StudyDescription", "LO"),
  37. (0x0008, 0x103E): ("SeriesDescription", "LO"),
  38. # UID's
  39. (0x0008, 0x0016): ("SOPClassUID", "UI"),
  40. (0x0008, 0x0018): ("SOPInstanceUID", "UI"),
  41. (0x0020, 0x000D): ("StudyInstanceUID", "UI"),
  42. (0x0020, 0x000E): ("SeriesInstanceUID", "UI"),
  43. (0x0008, 0x0117): ("ContextUID", "UI"),
  44. # Numbers
  45. (0x0020, 0x0011): ("SeriesNumber", "IS"),
  46. (0x0020, 0x0012): ("AcquisitionNumber", "IS"),
  47. (0x0020, 0x0013): ("InstanceNumber", "IS"),
  48. (0x0020, 0x0014): ("IsotopeNumber", "IS"),
  49. (0x0020, 0x0015): ("PhaseNumber", "IS"),
  50. (0x0020, 0x0016): ("IntervalNumber", "IS"),
  51. (0x0020, 0x0017): ("TimeSlotNumber", "IS"),
  52. (0x0020, 0x0018): ("AngleNumber", "IS"),
  53. (0x0020, 0x0019): ("ItemNumber", "IS"),
  54. (0x0020, 0x0020): ("PatientOrientation", "CS"),
  55. (0x0020, 0x0030): ("ImagePosition", "CS"),
  56. (0x0020, 0x0032): ("ImagePositionPatient", "CS"),
  57. (0x0020, 0x0035): ("ImageOrientation", "CS"),
  58. (0x0020, 0x0037): ("ImageOrientationPatient", "CS"),
  59. # Patient information
  60. (0x0010, 0x0010): ("PatientName", "PN"),
  61. (0x0010, 0x0020): ("PatientID", "LO"),
  62. (0x0010, 0x0030): ("PatientBirthDate", "DA"),
  63. (0x0010, 0x0040): ("PatientSex", "CS"),
  64. (0x0010, 0x1010): ("PatientAge", "AS"),
  65. (0x0010, 0x1020): ("PatientSize", "DS"),
  66. (0x0010, 0x1030): ("PatientWeight", "DS"),
  67. # Image specific (required to construct numpy array)
  68. (0x0028, 0x0002): ("SamplesPerPixel", "US"),
  69. (0x0028, 0x0008): ("NumberOfFrames", "IS"),
  70. (0x0028, 0x0100): ("BitsAllocated", "US"),
  71. (0x0028, 0x0101): ("BitsStored", "US"),
  72. (0x0028, 0x0102): ("HighBit", "US"),
  73. (0x0028, 0x0103): ("PixelRepresentation", "US"),
  74. (0x0028, 0x0010): ("Rows", "US"),
  75. (0x0028, 0x0011): ("Columns", "US"),
  76. (0x0028, 0x1052): ("RescaleIntercept", "DS"),
  77. (0x0028, 0x1053): ("RescaleSlope", "DS"),
  78. # Image specific (for the user)
  79. (0x0028, 0x0030): ("PixelSpacing", "DS"),
  80. (0x0018, 0x0088): ("SliceSpacing", "DS"),
  81. }
  82. # Define some special tags:
  83. # See PS 3.5-2008 section 7.5 (p.40)
  84. ItemTag = (0xFFFE, 0xE000) # start of Sequence Item
  85. ItemDelimiterTag = (0xFFFE, 0xE00D) # end of Sequence Item
  86. SequenceDelimiterTag = (0xFFFE, 0xE0DD) # end of Sequence of undefined length
  87. # Define set of groups that we're interested in (so we can quickly skip others)
  88. GROUPS = set([key[0] for key in MINIDICT.keys()])
  89. VRS = set([val[1] for val in MINIDICT.values()])
  90. class NotADicomFile(Exception):
  91. pass
  92. class CompressedDicom(RuntimeError):
  93. pass
  94. class SimpleDicomReader(object):
  95. """
  96. This class provides reading of pixel data from DICOM files. It is
  97. focussed on getting the pixel data, not the meta info.
  98. To use, first create an instance of this class (giving it
  99. a file object or filename). Next use the info attribute to
  100. get a dict of the meta data. The loading of pixel data is
  101. deferred until get_numpy_array() is called.
  102. Comparison with Pydicom
  103. -----------------------
  104. This code focusses on getting the pixel data out, which allows some
  105. shortcuts, resulting in the code being much smaller.
  106. Since the processing of data elements is much cheaper (it skips a lot
  107. of tags), this code is about 3x faster than pydicom (except for the
  108. deflated DICOM files).
  109. This class does borrow some code (and ideas) from the pydicom
  110. project, and (to the best of our knowledge) has the same limitations
  111. as pydicom with regard to the type of files that it can handle.
  112. Limitations
  113. -----------
  114. For more advanced DICOM processing, please check out pydicom.
  115. * Only a predefined subset of data elements (meta information) is read.
  116. * This is a reader; it can not write DICOM files.
  117. * (just like pydicom) it can handle none of the compressed DICOM
  118. formats except for "Deflated Explicit VR Little Endian"
  119. (1.2.840.10008.1.2.1.99).
  120. """
  121. def __init__(self, file):
  122. # Open file if filename given
  123. if isinstance(file, str):
  124. self._filename = file
  125. self._file = open(file, "rb")
  126. else:
  127. self._filename = "<unknown file>"
  128. self._file = file
  129. # Init variable to store position and size of pixel data
  130. self._pixel_data_loc = None
  131. # The meta header is always explicit and little endian
  132. self.is_implicit_VR = False
  133. self.is_little_endian = True
  134. self._unpackPrefix = "<"
  135. # Dict to store data elements of interest in
  136. self._info = {}
  137. # VR Conversion
  138. self._converters = {
  139. # Numbers
  140. "US": lambda x: self._unpack("H", x),
  141. "UL": lambda x: self._unpack("L", x),
  142. # Numbers encoded as strings
  143. "DS": lambda x: self._splitValues(x, float, "\\"),
  144. "IS": lambda x: self._splitValues(x, int, "\\"),
  145. # strings
  146. "AS": lambda x: x.decode("ascii", "ignore").strip("\x00"),
  147. "DA": lambda x: x.decode("ascii", "ignore").strip("\x00"),
  148. "TM": lambda x: x.decode("ascii", "ignore").strip("\x00"),
  149. "UI": lambda x: x.decode("ascii", "ignore").strip("\x00"),
  150. "LO": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
  151. "CS": lambda x: self._splitValues(x, float, "\\"),
  152. "PN": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
  153. }
  154. # Initiate reading
  155. self._read()
  156. @property
  157. def info(self):
  158. return self._info
  159. def _splitValues(self, x, type, splitter):
  160. s = x.decode("ascii").strip("\x00")
  161. try:
  162. if splitter in s:
  163. return tuple([type(v) for v in s.split(splitter) if v.strip()])
  164. else:
  165. return type(s)
  166. except ValueError:
  167. return s
  168. def _unpack(self, fmt, value):
  169. return struct.unpack(self._unpackPrefix + fmt, value)[0]
  170. # Really only so we need minimal changes to _pixel_data_numpy
  171. def __iter__(self):
  172. return iter(self._info.keys())
  173. def __getattr__(self, key):
  174. info = object.__getattribute__(self, "_info")
  175. if key in info:
  176. return info[key]
  177. return object.__getattribute__(self, key) # pragma: no cover
  178. def _read(self):
  179. f = self._file
  180. # Check prefix after peamble
  181. f.seek(128)
  182. if f.read(4) != b"DICM":
  183. raise NotADicomFile("Not a valid DICOM file.")
  184. # Read
  185. self._read_header()
  186. self._read_data_elements()
  187. self._get_shape_and_sampling()
  188. # Close if done, reopen if necessary to read pixel data
  189. if os.path.isfile(self._filename):
  190. self._file.close()
  191. self._file = None
  192. def _readDataElement(self):
  193. f = self._file
  194. # Get group and element
  195. group = self._unpack("H", f.read(2))
  196. element = self._unpack("H", f.read(2))
  197. # Get value length
  198. if self.is_implicit_VR:
  199. vl = self._unpack("I", f.read(4))
  200. else:
  201. vr = f.read(2)
  202. if vr in (b"OB", b"OW", b"SQ", b"UN"):
  203. reserved = f.read(2) # noqa
  204. vl = self._unpack("I", f.read(4))
  205. else:
  206. vl = self._unpack("H", f.read(2))
  207. # Get value
  208. if group == 0x7FE0 and element == 0x0010:
  209. here = f.tell()
  210. self._pixel_data_loc = here, vl
  211. f.seek(here + vl)
  212. return group, element, b"Deferred loading of pixel data"
  213. else:
  214. if vl == 0xFFFFFFFF:
  215. value = self._read_undefined_length_value()
  216. else:
  217. value = f.read(vl)
  218. return group, element, value
  219. def _read_undefined_length_value(self, read_size=128):
  220. """Copied (in compacted form) from PyDicom
  221. Copyright Darcy Mason.
  222. """
  223. fp = self._file
  224. # data_start = fp.tell()
  225. search_rewind = 3
  226. bytes_to_find = struct.pack(
  227. self._unpackPrefix + "HH", SequenceDelimiterTag[0], SequenceDelimiterTag[1]
  228. )
  229. found = False
  230. value_chunks = []
  231. while not found:
  232. chunk_start = fp.tell()
  233. bytes_read = fp.read(read_size)
  234. if len(bytes_read) < read_size:
  235. # try again,
  236. # if still don't get required amount, this is last block
  237. new_bytes = fp.read(read_size - len(bytes_read))
  238. bytes_read += new_bytes
  239. if len(bytes_read) < read_size:
  240. raise EOFError(
  241. "End of file reached before sequence " "delimiter found."
  242. )
  243. index = bytes_read.find(bytes_to_find)
  244. if index != -1:
  245. found = True
  246. value_chunks.append(bytes_read[:index])
  247. fp.seek(chunk_start + index + 4) # rewind to end of delimiter
  248. length = fp.read(4)
  249. if length != b"\0\0\0\0":
  250. logger.warning(
  251. "Expected 4 zero bytes after undefined length " "delimiter"
  252. )
  253. else:
  254. fp.seek(fp.tell() - search_rewind) # rewind a bit
  255. # accumulate the bytes read (not including the rewind)
  256. value_chunks.append(bytes_read[:-search_rewind])
  257. # if get here then have found the byte string
  258. return b"".join(value_chunks)
  259. def _read_header(self):
  260. f = self._file
  261. TransferSyntaxUID = None
  262. # Read all elements, store transferSyntax when we encounter it
  263. try:
  264. while True:
  265. fp_save = f.tell()
  266. # Get element
  267. group, element, value = self._readDataElement()
  268. if group == 0x02:
  269. if group == 0x02 and element == 0x10:
  270. TransferSyntaxUID = value.decode("ascii").strip("\x00")
  271. else:
  272. # No more group 2: rewind and break
  273. # (don't trust group length)
  274. f.seek(fp_save)
  275. break
  276. except (EOFError, struct.error): # pragma: no cover
  277. raise RuntimeError("End of file reached while still in header.")
  278. # Handle transfer syntax
  279. self._info["TransferSyntaxUID"] = TransferSyntaxUID
  280. #
  281. if TransferSyntaxUID is None:
  282. # Assume ExplicitVRLittleEndian
  283. is_implicit_VR, is_little_endian = False, True
  284. elif TransferSyntaxUID == "1.2.840.10008.1.2.1":
  285. # ExplicitVRLittleEndian
  286. is_implicit_VR, is_little_endian = False, True
  287. elif TransferSyntaxUID == "1.2.840.10008.1.2.2":
  288. # ExplicitVRBigEndian
  289. is_implicit_VR, is_little_endian = False, False
  290. elif TransferSyntaxUID == "1.2.840.10008.1.2":
  291. # implicit VR little endian
  292. is_implicit_VR, is_little_endian = True, True
  293. elif TransferSyntaxUID == "1.2.840.10008.1.2.1.99":
  294. # DeflatedExplicitVRLittleEndian:
  295. is_implicit_VR, is_little_endian = False, True
  296. self._inflate()
  297. else:
  298. # http://www.dicomlibrary.com/dicom/transfer-syntax/
  299. t, extra_info = TransferSyntaxUID, ""
  300. if "1.2.840.10008.1.2.4.50" <= t < "1.2.840.10008.1.2.4.99":
  301. extra_info = " (JPEG)"
  302. if "1.2.840.10008.1.2.4.90" <= t < "1.2.840.10008.1.2.4.99":
  303. extra_info = " (JPEG 2000)"
  304. if t == "1.2.840.10008.1.2.5":
  305. extra_info = " (RLE)"
  306. if t == "1.2.840.10008.1.2.6.1":
  307. extra_info = " (RFC 2557)"
  308. raise CompressedDicom(
  309. "The dicom reader can only read files with "
  310. "uncompressed image data - not %r%s. You "
  311. "can try using dcmtk or gdcm to convert the "
  312. "image." % (t, extra_info)
  313. )
  314. # From hereon, use implicit/explicit big/little endian
  315. self.is_implicit_VR = is_implicit_VR
  316. self.is_little_endian = is_little_endian
  317. self._unpackPrefix = "><"[is_little_endian]
  318. def _read_data_elements(self):
  319. info = self._info
  320. try:
  321. while True:
  322. # Get element
  323. group, element, value = self._readDataElement()
  324. # Is it a group we are interested in?
  325. if group in GROUPS:
  326. key = (group, element)
  327. name, vr = MINIDICT.get(key, (None, None))
  328. # Is it an element we are interested in?
  329. if name:
  330. # Store value
  331. converter = self._converters.get(vr, lambda x: x)
  332. info[name] = converter(value)
  333. except (EOFError, struct.error):
  334. pass # end of file ...
  335. def get_numpy_array(self):
  336. """Get numpy arra for this DICOM file, with the correct shape,
  337. and pixel values scaled appropriately.
  338. """
  339. # Is there pixel data at all?
  340. if "PixelData" not in self:
  341. raise TypeError("No pixel data found in this dataset.")
  342. # Load it now if it was not already loaded
  343. if self._pixel_data_loc and len(self.PixelData) < 100:
  344. # Reopen file?
  345. close_file = False
  346. if self._file is None:
  347. close_file = True
  348. self._file = open(self._filename, "rb")
  349. # Read data
  350. self._file.seek(self._pixel_data_loc[0])
  351. if self._pixel_data_loc[1] == 0xFFFFFFFF:
  352. value = self._read_undefined_length_value()
  353. else:
  354. value = self._file.read(self._pixel_data_loc[1])
  355. # Close file
  356. if close_file:
  357. self._file.close()
  358. self._file = None
  359. # Overwrite
  360. self._info["PixelData"] = value
  361. # Get data
  362. data = self._pixel_data_numpy()
  363. data = self._apply_slope_and_offset(data)
  364. # Remove data again to preserve memory
  365. # Note that the data for the original file is loaded twice ...
  366. self._info["PixelData"] = (
  367. b"Data converted to numpy array, " + b"raw data removed to preserve memory"
  368. )
  369. return data
  370. def _get_shape_and_sampling(self):
  371. """Get shape and sampling without actuall using the pixel data.
  372. In this way, the user can get an idea what's inside without having
  373. to load it.
  374. """
  375. # Get shape (in the same way that pydicom does)
  376. if "NumberOfFrames" in self and self.NumberOfFrames > 1:
  377. if self.SamplesPerPixel > 1:
  378. shape = (
  379. self.SamplesPerPixel,
  380. self.NumberOfFrames,
  381. self.Rows,
  382. self.Columns,
  383. )
  384. else:
  385. shape = self.NumberOfFrames, self.Rows, self.Columns
  386. elif "SamplesPerPixel" in self:
  387. if self.SamplesPerPixel > 1:
  388. if self.BitsAllocated == 8:
  389. shape = self.SamplesPerPixel, self.Rows, self.Columns
  390. else:
  391. raise NotImplementedError(
  392. "DICOM plugin only handles "
  393. "SamplesPerPixel > 1 if Bits "
  394. "Allocated = 8"
  395. )
  396. else:
  397. shape = self.Rows, self.Columns
  398. else:
  399. raise RuntimeError(
  400. "DICOM file has no SamplesPerPixel " "(perhaps this is a report?)"
  401. )
  402. # Try getting sampling between pixels
  403. if "PixelSpacing" in self:
  404. sampling = float(self.PixelSpacing[0]), float(self.PixelSpacing[1])
  405. else:
  406. sampling = 1.0, 1.0
  407. if "SliceSpacing" in self:
  408. sampling = (abs(self.SliceSpacing),) + sampling
  409. # Ensure that sampling has as many elements as shape
  410. sampling = (1.0,) * (len(shape) - len(sampling)) + sampling[-len(shape) :]
  411. # Set shape and sampling
  412. self._info["shape"] = shape
  413. self._info["sampling"] = sampling
  414. def _pixel_data_numpy(self):
  415. """Return a NumPy array of the pixel data."""
  416. # Taken from pydicom
  417. # Copyright (c) 2008-2012 Darcy Mason
  418. if "PixelData" not in self:
  419. raise TypeError("No pixel data found in this dataset.")
  420. # determine the type used for the array
  421. need_byteswap = self.is_little_endian != sys_is_little_endian
  422. # Make NumPy format code, e.g. "uint16", "int32" etc
  423. # from two pieces of info:
  424. # self.PixelRepresentation -- 0 for unsigned, 1 for signed;
  425. # self.BitsAllocated -- 8, 16, or 32
  426. format_str = "%sint%d" % (
  427. ("u", "")[self.PixelRepresentation],
  428. self.BitsAllocated,
  429. )
  430. try:
  431. numpy_format = np.dtype(format_str)
  432. except TypeError: # pragma: no cover
  433. raise TypeError(
  434. "Data type not understood by NumPy: format='%s', "
  435. " PixelRepresentation=%d, BitsAllocated=%d"
  436. % (numpy_format, self.PixelRepresentation, self.BitsAllocated)
  437. )
  438. # Have correct Numpy format, so create the NumPy array
  439. arr = np.frombuffer(self.PixelData, numpy_format).copy()
  440. # XXX byte swap - may later handle this in read_file!!?
  441. if need_byteswap:
  442. arr.byteswap(True) # True means swap in-place, don't make new copy
  443. # Note the following reshape operations return a new *view* onto arr,
  444. # but don't copy the data
  445. arr = arr.reshape(*self._info["shape"])
  446. return arr
  447. def _apply_slope_and_offset(self, data):
  448. """
  449. If RescaleSlope and RescaleIntercept are present in the data,
  450. apply them. The data type of the data is changed if necessary.
  451. """
  452. # Obtain slope and offset
  453. slope, offset = 1, 0
  454. needFloats, needApplySlopeOffset = False, False
  455. if "RescaleSlope" in self:
  456. needApplySlopeOffset = True
  457. slope = self.RescaleSlope
  458. if "RescaleIntercept" in self:
  459. needApplySlopeOffset = True
  460. offset = self.RescaleIntercept
  461. if int(slope) != slope or int(offset) != offset:
  462. needFloats = True
  463. if not needFloats:
  464. slope, offset = int(slope), int(offset)
  465. # Apply slope and offset
  466. if needApplySlopeOffset:
  467. # Maybe we need to change the datatype?
  468. if data.dtype in [np.float32, np.float64]:
  469. pass
  470. elif needFloats:
  471. data = data.astype(np.float32)
  472. else:
  473. # Determine required range
  474. minReq, maxReq = data.min().item(), data.max().item()
  475. minReq = min([minReq, minReq * slope + offset, maxReq * slope + offset])
  476. maxReq = max([maxReq, minReq * slope + offset, maxReq * slope + offset])
  477. # Determine required datatype from that
  478. dtype = None
  479. if minReq < 0:
  480. # Signed integer type
  481. maxReq = max([-minReq, maxReq])
  482. if maxReq < 2**7:
  483. dtype = np.int8
  484. elif maxReq < 2**15:
  485. dtype = np.int16
  486. elif maxReq < 2**31:
  487. dtype = np.int32
  488. else:
  489. dtype = np.float32
  490. else:
  491. # Unsigned integer type
  492. if maxReq < 2**8:
  493. dtype = np.int8
  494. elif maxReq < 2**16:
  495. dtype = np.int16
  496. elif maxReq < 2**32:
  497. dtype = np.int32
  498. else:
  499. dtype = np.float32
  500. # Change datatype
  501. if dtype != data.dtype:
  502. data = data.astype(dtype)
  503. # Apply slope and offset
  504. data *= slope
  505. data += offset
  506. # Done
  507. return data
  508. def _inflate(self):
  509. # Taken from pydicom
  510. # Copyright (c) 2008-2012 Darcy Mason
  511. import zlib
  512. from io import BytesIO
  513. # See PS3.6-2008 A.5 (p 71) -- when written, the entire dataset
  514. # following the file metadata was prepared the normal way,
  515. # then "deflate" compression applied.
  516. # All that is needed here is to decompress and then
  517. # use as normal in a file-like object
  518. zipped = self._file.read()
  519. # -MAX_WBITS part is from comp.lang.python answer:
  520. # groups.google.com/group/comp.lang.python/msg/e95b3b38a71e6799
  521. unzipped = zlib.decompress(zipped, -zlib.MAX_WBITS)
  522. self._file = BytesIO(unzipped) # a file-like object
  523. class DicomSeries(object):
  524. """DicomSeries
  525. This class represents a serie of dicom files (SimpleDicomReader
  526. objects) that belong together. If these are multiple files, they
  527. represent the slices of a volume (like for CT or MRI).
  528. """
  529. def __init__(self, suid, progressIndicator):
  530. # Init dataset list and the callback
  531. self._entries = []
  532. # Init props
  533. self._suid = suid
  534. self._info = {}
  535. self._progressIndicator = progressIndicator
  536. def __len__(self):
  537. return len(self._entries)
  538. def __iter__(self):
  539. return iter(self._entries)
  540. def __getitem__(self, index):
  541. return self._entries[index]
  542. @property
  543. def suid(self):
  544. return self._suid
  545. @property
  546. def shape(self):
  547. """The shape of the data (nz, ny, nx)."""
  548. return self._info["shape"]
  549. @property
  550. def sampling(self):
  551. """The sampling (voxel distances) of the data (dz, dy, dx)."""
  552. return self._info["sampling"]
  553. @property
  554. def info(self):
  555. """A dictionary containing the information as present in the
  556. first dicomfile of this serie. None if there are no entries."""
  557. return self._info
  558. @property
  559. def description(self):
  560. """A description of the dicom series. Used fields are
  561. PatientName, shape of the data, SeriesDescription, and
  562. ImageComments.
  563. """
  564. info = self.info
  565. # If no info available, return simple description
  566. if not info: # pragma: no cover
  567. return "DicomSeries containing %i images" % len(self)
  568. fields = []
  569. # Give patient name
  570. if "PatientName" in info:
  571. fields.append("" + info["PatientName"])
  572. # Also add dimensions
  573. if self.shape:
  574. tmp = [str(d) for d in self.shape]
  575. fields.append("x".join(tmp))
  576. # Try adding more fields
  577. if "SeriesDescription" in info:
  578. fields.append("'" + info["SeriesDescription"] + "'")
  579. if "ImageComments" in info:
  580. fields.append("'" + info["ImageComments"] + "'")
  581. # Combine
  582. return " ".join(fields)
  583. def __repr__(self):
  584. adr = hex(id(self)).upper()
  585. return "<DicomSeries with %i images at %s>" % (len(self), adr)
  586. def get_numpy_array(self):
  587. """Get (load) the data that this DicomSeries represents, and return
  588. it as a numpy array. If this serie contains multiple images, the
  589. resulting array is 3D, otherwise it's 2D.
  590. """
  591. # It's easy if no file or if just a single file
  592. if len(self) == 0:
  593. raise ValueError("Serie does not contain any files.")
  594. elif len(self) == 1:
  595. return self[0].get_numpy_array()
  596. # Check info
  597. if self.info is None:
  598. raise RuntimeError("Cannot return volume if series not finished.")
  599. # Init data (using what the dicom packaged produces as a reference)
  600. slice = self[0].get_numpy_array()
  601. vol = np.zeros(self.shape, dtype=slice.dtype)
  602. vol[0] = slice
  603. # Fill volume
  604. self._progressIndicator.start("loading data", "", len(self))
  605. for z in range(1, len(self)):
  606. vol[z] = self[z].get_numpy_array()
  607. self._progressIndicator.set_progress(z + 1)
  608. self._progressIndicator.finish()
  609. # Done
  610. import gc
  611. gc.collect()
  612. return vol
  613. def _append(self, dcm):
  614. self._entries.append(dcm)
  615. def _sort(self):
  616. self._entries.sort(
  617. key=lambda k: (
  618. k.InstanceNumber,
  619. (
  620. k.ImagePositionPatient[2]
  621. if hasattr(k, "ImagePositionPatient")
  622. else None
  623. ),
  624. )
  625. )
  626. def _finish(self):
  627. """
  628. Evaluate the series of dicom files. Together they should make up
  629. a volumetric dataset. This means the files should meet certain
  630. conditions. Also some additional information has to be calculated,
  631. such as the distance between the slices. This method sets the
  632. attributes for "shape", "sampling" and "info".
  633. This method checks:
  634. * that there are no missing files
  635. * that the dimensions of all images match
  636. * that the pixel spacing of all images match
  637. """
  638. # The datasets list should be sorted by instance number
  639. L = self._entries
  640. if len(L) == 0:
  641. return
  642. elif len(L) == 1:
  643. self._info = L[0].info
  644. return
  645. # Get previous
  646. ds1 = L[0]
  647. # Init measures to calculate average of
  648. distance_sum = 0.0
  649. # Init measures to check (these are in 2D)
  650. dimensions = ds1.Rows, ds1.Columns
  651. # sampling = float(ds1.PixelSpacing[0]), float(ds1.PixelSpacing[1])
  652. sampling = ds1.info["sampling"][:2] # row, column
  653. for index in range(len(L)):
  654. # The first round ds1 and ds2 will be the same, for the
  655. # distance calculation this does not matter
  656. # Get current
  657. ds2 = L[index]
  658. # Get positions
  659. pos1 = float(ds1.ImagePositionPatient[2])
  660. pos2 = float(ds2.ImagePositionPatient[2])
  661. # Update distance_sum to calculate distance later
  662. distance_sum += abs(pos1 - pos2)
  663. # Test measures
  664. dimensions2 = ds2.Rows, ds2.Columns
  665. # sampling2 = float(ds2.PixelSpacing[0]), float(ds2.PixelSpacing[1])
  666. sampling2 = ds2.info["sampling"][:2] # row, column
  667. if dimensions != dimensions2:
  668. # We cannot produce a volume if the dimensions match
  669. raise ValueError("Dimensions of slices does not match.")
  670. if sampling != sampling2:
  671. # We can still produce a volume, but we should notify the user
  672. self._progressIndicator.write("Warn: sampling does not match.")
  673. # Store previous
  674. ds1 = ds2
  675. # Finish calculating average distance
  676. # (Note that there are len(L)-1 distances)
  677. distance_mean = distance_sum / (len(L) - 1)
  678. # Set info dict
  679. self._info = L[0].info.copy()
  680. # Store information that is specific for the serie
  681. self._info["shape"] = (len(L),) + ds2.info["shape"]
  682. self._info["sampling"] = (distance_mean,) + ds2.info["sampling"]
  683. def list_files(files, path):
  684. """List all files in the directory, recursively."""
  685. for item in os.listdir(path):
  686. item = os.path.join(path, item)
  687. if os.path.isdir(item):
  688. list_files(files, item)
  689. elif os.path.isfile(item):
  690. files.append(item)
  691. def process_directory(request, progressIndicator, readPixelData=False):
  692. """
  693. Reads dicom files and returns a list of DicomSeries objects, which
  694. contain information about the data, and can be used to load the
  695. image or volume data.
  696. if readPixelData is True, the pixel data of all series is read. By
  697. default the loading of pixeldata is deferred until it is requested
  698. using the DicomSeries.get_pixel_array() method. In general, both
  699. methods should be equally fast.
  700. """
  701. # Get directory to examine
  702. if os.path.isdir(request.filename):
  703. path = request.filename
  704. elif os.path.isfile(request.filename):
  705. path = os.path.dirname(request.filename)
  706. else: # pragma: no cover - tested earlier
  707. raise ValueError("Dicom plugin needs a valid filename to examine the directory")
  708. # Check files
  709. files = []
  710. list_files(files, path) # Find files recursively
  711. # Gather file data and put in DicomSeries
  712. series = {}
  713. count = 0
  714. progressIndicator.start("examining files", "files", len(files))
  715. for filename in files:
  716. # Show progress (note that we always start with a 0.0)
  717. count += 1
  718. progressIndicator.set_progress(count)
  719. # Skip DICOMDIR files
  720. if filename.count("DICOMDIR"): # pragma: no cover
  721. continue
  722. # Try loading dicom ...
  723. try:
  724. dcm = SimpleDicomReader(filename)
  725. except NotADicomFile:
  726. continue # skip non-dicom file
  727. except Exception as why: # pragma: no cover
  728. progressIndicator.write(str(why))
  729. continue
  730. # Get SUID and register the file with an existing or new series object
  731. try:
  732. suid = dcm.SeriesInstanceUID
  733. except AttributeError: # pragma: no cover
  734. continue # some other kind of dicom file
  735. if suid not in series:
  736. series[suid] = DicomSeries(suid, progressIndicator)
  737. series[suid]._append(dcm)
  738. # Finish progress
  739. # progressIndicator.finish('Found %i series.' % len(series))
  740. # Make a list and sort, so that the order is deterministic
  741. series = list(series.values())
  742. series.sort(key=lambda x: x.suid)
  743. # Split series if necessary
  744. for serie in reversed([serie for serie in series]):
  745. splitSerieIfRequired(serie, series, progressIndicator)
  746. # Finish all series
  747. # progressIndicator.start('analyse series', '', len(series))
  748. series_ = []
  749. for i in range(len(series)):
  750. try:
  751. series[i]._finish()
  752. series_.append(series[i])
  753. except Exception as err: # pragma: no cover
  754. progressIndicator.write(str(err))
  755. pass # Skip serie (probably report-like file without pixels)
  756. # progressIndicator.set_progress(i+1)
  757. progressIndicator.finish("Found %i correct series." % len(series_))
  758. # Done
  759. return series_
  760. def splitSerieIfRequired(serie, series, progressIndicator):
  761. """
  762. Split the serie in multiple series if this is required. The choice
  763. is based on examing the image position relative to the previous
  764. image. If it differs too much, it is assumed that there is a new
  765. dataset. This can happen for example in unspitted gated CT data.
  766. """
  767. # Sort the original list and get local name
  768. serie._sort()
  769. L = serie._entries
  770. # Init previous slice
  771. ds1 = L[0]
  772. # Check whether we can do this
  773. if "ImagePositionPatient" not in ds1:
  774. return
  775. # Initialize a list of new lists
  776. L2 = [[ds1]]
  777. # Init slice distance estimate
  778. distance = 0
  779. for index in range(1, len(L)):
  780. # Get current slice
  781. ds2 = L[index]
  782. # Get positions
  783. pos1 = float(ds1.ImagePositionPatient[2])
  784. pos2 = float(ds2.ImagePositionPatient[2])
  785. # Get distances
  786. newDist = abs(pos1 - pos2)
  787. # deltaDist = abs(firstPos-pos2)
  788. # If the distance deviates more than 2x from what we've seen,
  789. # we can agree it's a new dataset.
  790. if distance and newDist > 2.1 * distance:
  791. L2.append([])
  792. distance = 0
  793. else:
  794. # Test missing file
  795. if distance and newDist > 1.5 * distance:
  796. progressIndicator.write(
  797. "Warning: missing file after %r" % ds1._filename
  798. )
  799. distance = newDist
  800. # Add to last list
  801. L2[-1].append(ds2)
  802. # Store previous
  803. ds1 = ds2
  804. # Split if we should
  805. if len(L2) > 1:
  806. # At what position are we now?
  807. i = series.index(serie)
  808. # Create new series
  809. series2insert = []
  810. for L in L2:
  811. newSerie = DicomSeries(serie.suid, progressIndicator)
  812. newSerie._entries = L
  813. series2insert.append(newSerie)
  814. # Insert series and remove self
  815. for newSerie in reversed(series2insert):
  816. series.insert(i, newSerie)
  817. series.remove(serie)