| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931 |
- # -*- coding: utf-8 -*-
- # imageio is distributed under the terms of the (new) BSD License.
- """Plugin for reading DICOM files."""
- # todo: Use pydicom:
- # * Note: is not py3k ready yet
- # * Allow reading the full meta info
- # I think we can more or less replace the SimpleDicomReader with a
- # pydicom.Dataset For series, only ned to read the full info from one
- # file: speed still high
- # * Perhaps allow writing?
- import sys
- import os
- import struct
- import logging
- import numpy as np
- logger = logging.getLogger(__name__)
- # Determine endianity of system
- sys_is_little_endian = sys.byteorder == "little"
- # Define a dictionary that contains the tags that we would like to know
- MINIDICT = {
- (0x7FE0, 0x0010): ("PixelData", "OB"),
- # Date and time
- (0x0008, 0x0020): ("StudyDate", "DA"),
- (0x0008, 0x0021): ("SeriesDate", "DA"),
- (0x0008, 0x0022): ("AcquisitionDate", "DA"),
- (0x0008, 0x0023): ("ContentDate", "DA"),
- (0x0008, 0x0030): ("StudyTime", "TM"),
- (0x0008, 0x0031): ("SeriesTime", "TM"),
- (0x0008, 0x0032): ("AcquisitionTime", "TM"),
- (0x0008, 0x0033): ("ContentTime", "TM"),
- # With what, where, by whom?
- (0x0008, 0x0060): ("Modality", "CS"),
- (0x0008, 0x0070): ("Manufacturer", "LO"),
- (0x0008, 0x0080): ("InstitutionName", "LO"),
- # Descriptions
- (0x0008, 0x1030): ("StudyDescription", "LO"),
- (0x0008, 0x103E): ("SeriesDescription", "LO"),
- # UID's
- (0x0008, 0x0016): ("SOPClassUID", "UI"),
- (0x0008, 0x0018): ("SOPInstanceUID", "UI"),
- (0x0020, 0x000D): ("StudyInstanceUID", "UI"),
- (0x0020, 0x000E): ("SeriesInstanceUID", "UI"),
- (0x0008, 0x0117): ("ContextUID", "UI"),
- # Numbers
- (0x0020, 0x0011): ("SeriesNumber", "IS"),
- (0x0020, 0x0012): ("AcquisitionNumber", "IS"),
- (0x0020, 0x0013): ("InstanceNumber", "IS"),
- (0x0020, 0x0014): ("IsotopeNumber", "IS"),
- (0x0020, 0x0015): ("PhaseNumber", "IS"),
- (0x0020, 0x0016): ("IntervalNumber", "IS"),
- (0x0020, 0x0017): ("TimeSlotNumber", "IS"),
- (0x0020, 0x0018): ("AngleNumber", "IS"),
- (0x0020, 0x0019): ("ItemNumber", "IS"),
- (0x0020, 0x0020): ("PatientOrientation", "CS"),
- (0x0020, 0x0030): ("ImagePosition", "CS"),
- (0x0020, 0x0032): ("ImagePositionPatient", "CS"),
- (0x0020, 0x0035): ("ImageOrientation", "CS"),
- (0x0020, 0x0037): ("ImageOrientationPatient", "CS"),
- # Patient information
- (0x0010, 0x0010): ("PatientName", "PN"),
- (0x0010, 0x0020): ("PatientID", "LO"),
- (0x0010, 0x0030): ("PatientBirthDate", "DA"),
- (0x0010, 0x0040): ("PatientSex", "CS"),
- (0x0010, 0x1010): ("PatientAge", "AS"),
- (0x0010, 0x1020): ("PatientSize", "DS"),
- (0x0010, 0x1030): ("PatientWeight", "DS"),
- # Image specific (required to construct numpy array)
- (0x0028, 0x0002): ("SamplesPerPixel", "US"),
- (0x0028, 0x0008): ("NumberOfFrames", "IS"),
- (0x0028, 0x0100): ("BitsAllocated", "US"),
- (0x0028, 0x0101): ("BitsStored", "US"),
- (0x0028, 0x0102): ("HighBit", "US"),
- (0x0028, 0x0103): ("PixelRepresentation", "US"),
- (0x0028, 0x0010): ("Rows", "US"),
- (0x0028, 0x0011): ("Columns", "US"),
- (0x0028, 0x1052): ("RescaleIntercept", "DS"),
- (0x0028, 0x1053): ("RescaleSlope", "DS"),
- # Image specific (for the user)
- (0x0028, 0x0030): ("PixelSpacing", "DS"),
- (0x0018, 0x0088): ("SliceSpacing", "DS"),
- }
- # Define some special tags:
- # See PS 3.5-2008 section 7.5 (p.40)
- ItemTag = (0xFFFE, 0xE000) # start of Sequence Item
- ItemDelimiterTag = (0xFFFE, 0xE00D) # end of Sequence Item
- SequenceDelimiterTag = (0xFFFE, 0xE0DD) # end of Sequence of undefined length
- # Define set of groups that we're interested in (so we can quickly skip others)
- GROUPS = set([key[0] for key in MINIDICT.keys()])
- VRS = set([val[1] for val in MINIDICT.values()])
- class NotADicomFile(Exception):
- pass
- class CompressedDicom(RuntimeError):
- pass
- class SimpleDicomReader(object):
- """
- This class provides reading of pixel data from DICOM files. It is
- focussed on getting the pixel data, not the meta info.
- To use, first create an instance of this class (giving it
- a file object or filename). Next use the info attribute to
- get a dict of the meta data. The loading of pixel data is
- deferred until get_numpy_array() is called.
- Comparison with Pydicom
- -----------------------
- This code focusses on getting the pixel data out, which allows some
- shortcuts, resulting in the code being much smaller.
- Since the processing of data elements is much cheaper (it skips a lot
- of tags), this code is about 3x faster than pydicom (except for the
- deflated DICOM files).
- This class does borrow some code (and ideas) from the pydicom
- project, and (to the best of our knowledge) has the same limitations
- as pydicom with regard to the type of files that it can handle.
- Limitations
- -----------
- For more advanced DICOM processing, please check out pydicom.
- * Only a predefined subset of data elements (meta information) is read.
- * This is a reader; it can not write DICOM files.
- * (just like pydicom) it can handle none of the compressed DICOM
- formats except for "Deflated Explicit VR Little Endian"
- (1.2.840.10008.1.2.1.99).
- """
- def __init__(self, file):
- # Open file if filename given
- if isinstance(file, str):
- self._filename = file
- self._file = open(file, "rb")
- else:
- self._filename = "<unknown file>"
- self._file = file
- # Init variable to store position and size of pixel data
- self._pixel_data_loc = None
- # The meta header is always explicit and little endian
- self.is_implicit_VR = False
- self.is_little_endian = True
- self._unpackPrefix = "<"
- # Dict to store data elements of interest in
- self._info = {}
- # VR Conversion
- self._converters = {
- # Numbers
- "US": lambda x: self._unpack("H", x),
- "UL": lambda x: self._unpack("L", x),
- # Numbers encoded as strings
- "DS": lambda x: self._splitValues(x, float, "\\"),
- "IS": lambda x: self._splitValues(x, int, "\\"),
- # strings
- "AS": lambda x: x.decode("ascii", "ignore").strip("\x00"),
- "DA": lambda x: x.decode("ascii", "ignore").strip("\x00"),
- "TM": lambda x: x.decode("ascii", "ignore").strip("\x00"),
- "UI": lambda x: x.decode("ascii", "ignore").strip("\x00"),
- "LO": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
- "CS": lambda x: self._splitValues(x, float, "\\"),
- "PN": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
- }
- # Initiate reading
- self._read()
- @property
- def info(self):
- return self._info
- def _splitValues(self, x, type, splitter):
- s = x.decode("ascii").strip("\x00")
- try:
- if splitter in s:
- return tuple([type(v) for v in s.split(splitter) if v.strip()])
- else:
- return type(s)
- except ValueError:
- return s
- def _unpack(self, fmt, value):
- return struct.unpack(self._unpackPrefix + fmt, value)[0]
- # Really only so we need minimal changes to _pixel_data_numpy
- def __iter__(self):
- return iter(self._info.keys())
- def __getattr__(self, key):
- info = object.__getattribute__(self, "_info")
- if key in info:
- return info[key]
- return object.__getattribute__(self, key) # pragma: no cover
- def _read(self):
- f = self._file
- # Check prefix after peamble
- f.seek(128)
- if f.read(4) != b"DICM":
- raise NotADicomFile("Not a valid DICOM file.")
- # Read
- self._read_header()
- self._read_data_elements()
- self._get_shape_and_sampling()
- # Close if done, reopen if necessary to read pixel data
- if os.path.isfile(self._filename):
- self._file.close()
- self._file = None
- def _readDataElement(self):
- f = self._file
- # Get group and element
- group = self._unpack("H", f.read(2))
- element = self._unpack("H", f.read(2))
- # Get value length
- if self.is_implicit_VR:
- vl = self._unpack("I", f.read(4))
- else:
- vr = f.read(2)
- if vr in (b"OB", b"OW", b"SQ", b"UN"):
- reserved = f.read(2) # noqa
- vl = self._unpack("I", f.read(4))
- else:
- vl = self._unpack("H", f.read(2))
- # Get value
- if group == 0x7FE0 and element == 0x0010:
- here = f.tell()
- self._pixel_data_loc = here, vl
- f.seek(here + vl)
- return group, element, b"Deferred loading of pixel data"
- else:
- if vl == 0xFFFFFFFF:
- value = self._read_undefined_length_value()
- else:
- value = f.read(vl)
- return group, element, value
- def _read_undefined_length_value(self, read_size=128):
- """Copied (in compacted form) from PyDicom
- Copyright Darcy Mason.
- """
- fp = self._file
- # data_start = fp.tell()
- search_rewind = 3
- bytes_to_find = struct.pack(
- self._unpackPrefix + "HH", SequenceDelimiterTag[0], SequenceDelimiterTag[1]
- )
- found = False
- value_chunks = []
- while not found:
- chunk_start = fp.tell()
- bytes_read = fp.read(read_size)
- if len(bytes_read) < read_size:
- # try again,
- # if still don't get required amount, this is last block
- new_bytes = fp.read(read_size - len(bytes_read))
- bytes_read += new_bytes
- if len(bytes_read) < read_size:
- raise EOFError(
- "End of file reached before sequence " "delimiter found."
- )
- index = bytes_read.find(bytes_to_find)
- if index != -1:
- found = True
- value_chunks.append(bytes_read[:index])
- fp.seek(chunk_start + index + 4) # rewind to end of delimiter
- length = fp.read(4)
- if length != b"\0\0\0\0":
- logger.warning(
- "Expected 4 zero bytes after undefined length " "delimiter"
- )
- else:
- fp.seek(fp.tell() - search_rewind) # rewind a bit
- # accumulate the bytes read (not including the rewind)
- value_chunks.append(bytes_read[:-search_rewind])
- # if get here then have found the byte string
- return b"".join(value_chunks)
- def _read_header(self):
- f = self._file
- TransferSyntaxUID = None
- # Read all elements, store transferSyntax when we encounter it
- try:
- while True:
- fp_save = f.tell()
- # Get element
- group, element, value = self._readDataElement()
- if group == 0x02:
- if group == 0x02 and element == 0x10:
- TransferSyntaxUID = value.decode("ascii").strip("\x00")
- else:
- # No more group 2: rewind and break
- # (don't trust group length)
- f.seek(fp_save)
- break
- except (EOFError, struct.error): # pragma: no cover
- raise RuntimeError("End of file reached while still in header.")
- # Handle transfer syntax
- self._info["TransferSyntaxUID"] = TransferSyntaxUID
- #
- if TransferSyntaxUID is None:
- # Assume ExplicitVRLittleEndian
- is_implicit_VR, is_little_endian = False, True
- elif TransferSyntaxUID == "1.2.840.10008.1.2.1":
- # ExplicitVRLittleEndian
- is_implicit_VR, is_little_endian = False, True
- elif TransferSyntaxUID == "1.2.840.10008.1.2.2":
- # ExplicitVRBigEndian
- is_implicit_VR, is_little_endian = False, False
- elif TransferSyntaxUID == "1.2.840.10008.1.2":
- # implicit VR little endian
- is_implicit_VR, is_little_endian = True, True
- elif TransferSyntaxUID == "1.2.840.10008.1.2.1.99":
- # DeflatedExplicitVRLittleEndian:
- is_implicit_VR, is_little_endian = False, True
- self._inflate()
- else:
- # http://www.dicomlibrary.com/dicom/transfer-syntax/
- t, extra_info = TransferSyntaxUID, ""
- if "1.2.840.10008.1.2.4.50" <= t < "1.2.840.10008.1.2.4.99":
- extra_info = " (JPEG)"
- if "1.2.840.10008.1.2.4.90" <= t < "1.2.840.10008.1.2.4.99":
- extra_info = " (JPEG 2000)"
- if t == "1.2.840.10008.1.2.5":
- extra_info = " (RLE)"
- if t == "1.2.840.10008.1.2.6.1":
- extra_info = " (RFC 2557)"
- raise CompressedDicom(
- "The dicom reader can only read files with "
- "uncompressed image data - not %r%s. You "
- "can try using dcmtk or gdcm to convert the "
- "image." % (t, extra_info)
- )
- # From hereon, use implicit/explicit big/little endian
- self.is_implicit_VR = is_implicit_VR
- self.is_little_endian = is_little_endian
- self._unpackPrefix = "><"[is_little_endian]
- def _read_data_elements(self):
- info = self._info
- try:
- while True:
- # Get element
- group, element, value = self._readDataElement()
- # Is it a group we are interested in?
- if group in GROUPS:
- key = (group, element)
- name, vr = MINIDICT.get(key, (None, None))
- # Is it an element we are interested in?
- if name:
- # Store value
- converter = self._converters.get(vr, lambda x: x)
- info[name] = converter(value)
- except (EOFError, struct.error):
- pass # end of file ...
- def get_numpy_array(self):
- """Get numpy arra for this DICOM file, with the correct shape,
- and pixel values scaled appropriately.
- """
- # Is there pixel data at all?
- if "PixelData" not in self:
- raise TypeError("No pixel data found in this dataset.")
- # Load it now if it was not already loaded
- if self._pixel_data_loc and len(self.PixelData) < 100:
- # Reopen file?
- close_file = False
- if self._file is None:
- close_file = True
- self._file = open(self._filename, "rb")
- # Read data
- self._file.seek(self._pixel_data_loc[0])
- if self._pixel_data_loc[1] == 0xFFFFFFFF:
- value = self._read_undefined_length_value()
- else:
- value = self._file.read(self._pixel_data_loc[1])
- # Close file
- if close_file:
- self._file.close()
- self._file = None
- # Overwrite
- self._info["PixelData"] = value
- # Get data
- data = self._pixel_data_numpy()
- data = self._apply_slope_and_offset(data)
- # Remove data again to preserve memory
- # Note that the data for the original file is loaded twice ...
- self._info["PixelData"] = (
- b"Data converted to numpy array, " + b"raw data removed to preserve memory"
- )
- return data
- def _get_shape_and_sampling(self):
- """Get shape and sampling without actuall using the pixel data.
- In this way, the user can get an idea what's inside without having
- to load it.
- """
- # Get shape (in the same way that pydicom does)
- if "NumberOfFrames" in self and self.NumberOfFrames > 1:
- if self.SamplesPerPixel > 1:
- shape = (
- self.SamplesPerPixel,
- self.NumberOfFrames,
- self.Rows,
- self.Columns,
- )
- else:
- shape = self.NumberOfFrames, self.Rows, self.Columns
- elif "SamplesPerPixel" in self:
- if self.SamplesPerPixel > 1:
- if self.BitsAllocated == 8:
- shape = self.SamplesPerPixel, self.Rows, self.Columns
- else:
- raise NotImplementedError(
- "DICOM plugin only handles "
- "SamplesPerPixel > 1 if Bits "
- "Allocated = 8"
- )
- else:
- shape = self.Rows, self.Columns
- else:
- raise RuntimeError(
- "DICOM file has no SamplesPerPixel " "(perhaps this is a report?)"
- )
- # Try getting sampling between pixels
- if "PixelSpacing" in self:
- sampling = float(self.PixelSpacing[0]), float(self.PixelSpacing[1])
- else:
- sampling = 1.0, 1.0
- if "SliceSpacing" in self:
- sampling = (abs(self.SliceSpacing),) + sampling
- # Ensure that sampling has as many elements as shape
- sampling = (1.0,) * (len(shape) - len(sampling)) + sampling[-len(shape) :]
- # Set shape and sampling
- self._info["shape"] = shape
- self._info["sampling"] = sampling
- def _pixel_data_numpy(self):
- """Return a NumPy array of the pixel data."""
- # Taken from pydicom
- # Copyright (c) 2008-2012 Darcy Mason
- if "PixelData" not in self:
- raise TypeError("No pixel data found in this dataset.")
- # determine the type used for the array
- need_byteswap = self.is_little_endian != sys_is_little_endian
- # Make NumPy format code, e.g. "uint16", "int32" etc
- # from two pieces of info:
- # self.PixelRepresentation -- 0 for unsigned, 1 for signed;
- # self.BitsAllocated -- 8, 16, or 32
- format_str = "%sint%d" % (
- ("u", "")[self.PixelRepresentation],
- self.BitsAllocated,
- )
- try:
- numpy_format = np.dtype(format_str)
- except TypeError: # pragma: no cover
- raise TypeError(
- "Data type not understood by NumPy: format='%s', "
- " PixelRepresentation=%d, BitsAllocated=%d"
- % (numpy_format, self.PixelRepresentation, self.BitsAllocated)
- )
- # Have correct Numpy format, so create the NumPy array
- arr = np.frombuffer(self.PixelData, numpy_format).copy()
- # XXX byte swap - may later handle this in read_file!!?
- if need_byteswap:
- arr.byteswap(True) # True means swap in-place, don't make new copy
- # Note the following reshape operations return a new *view* onto arr,
- # but don't copy the data
- arr = arr.reshape(*self._info["shape"])
- return arr
- def _apply_slope_and_offset(self, data):
- """
- If RescaleSlope and RescaleIntercept are present in the data,
- apply them. The data type of the data is changed if necessary.
- """
- # Obtain slope and offset
- slope, offset = 1, 0
- needFloats, needApplySlopeOffset = False, False
- if "RescaleSlope" in self:
- needApplySlopeOffset = True
- slope = self.RescaleSlope
- if "RescaleIntercept" in self:
- needApplySlopeOffset = True
- offset = self.RescaleIntercept
- if int(slope) != slope or int(offset) != offset:
- needFloats = True
- if not needFloats:
- slope, offset = int(slope), int(offset)
- # Apply slope and offset
- if needApplySlopeOffset:
- # Maybe we need to change the datatype?
- if data.dtype in [np.float32, np.float64]:
- pass
- elif needFloats:
- data = data.astype(np.float32)
- else:
- # Determine required range
- minReq, maxReq = data.min().item(), data.max().item()
- minReq = min([minReq, minReq * slope + offset, maxReq * slope + offset])
- maxReq = max([maxReq, minReq * slope + offset, maxReq * slope + offset])
- # Determine required datatype from that
- dtype = None
- if minReq < 0:
- # Signed integer type
- maxReq = max([-minReq, maxReq])
- if maxReq < 2**7:
- dtype = np.int8
- elif maxReq < 2**15:
- dtype = np.int16
- elif maxReq < 2**31:
- dtype = np.int32
- else:
- dtype = np.float32
- else:
- # Unsigned integer type
- if maxReq < 2**8:
- dtype = np.int8
- elif maxReq < 2**16:
- dtype = np.int16
- elif maxReq < 2**32:
- dtype = np.int32
- else:
- dtype = np.float32
- # Change datatype
- if dtype != data.dtype:
- data = data.astype(dtype)
- # Apply slope and offset
- data *= slope
- data += offset
- # Done
- return data
- def _inflate(self):
- # Taken from pydicom
- # Copyright (c) 2008-2012 Darcy Mason
- import zlib
- from io import BytesIO
- # See PS3.6-2008 A.5 (p 71) -- when written, the entire dataset
- # following the file metadata was prepared the normal way,
- # then "deflate" compression applied.
- # All that is needed here is to decompress and then
- # use as normal in a file-like object
- zipped = self._file.read()
- # -MAX_WBITS part is from comp.lang.python answer:
- # groups.google.com/group/comp.lang.python/msg/e95b3b38a71e6799
- unzipped = zlib.decompress(zipped, -zlib.MAX_WBITS)
- self._file = BytesIO(unzipped) # a file-like object
- class DicomSeries(object):
- """DicomSeries
- This class represents a serie of dicom files (SimpleDicomReader
- objects) that belong together. If these are multiple files, they
- represent the slices of a volume (like for CT or MRI).
- """
- def __init__(self, suid, progressIndicator):
- # Init dataset list and the callback
- self._entries = []
- # Init props
- self._suid = suid
- self._info = {}
- self._progressIndicator = progressIndicator
- def __len__(self):
- return len(self._entries)
- def __iter__(self):
- return iter(self._entries)
- def __getitem__(self, index):
- return self._entries[index]
- @property
- def suid(self):
- return self._suid
- @property
- def shape(self):
- """The shape of the data (nz, ny, nx)."""
- return self._info["shape"]
- @property
- def sampling(self):
- """The sampling (voxel distances) of the data (dz, dy, dx)."""
- return self._info["sampling"]
- @property
- def info(self):
- """A dictionary containing the information as present in the
- first dicomfile of this serie. None if there are no entries."""
- return self._info
- @property
- def description(self):
- """A description of the dicom series. Used fields are
- PatientName, shape of the data, SeriesDescription, and
- ImageComments.
- """
- info = self.info
- # If no info available, return simple description
- if not info: # pragma: no cover
- return "DicomSeries containing %i images" % len(self)
- fields = []
- # Give patient name
- if "PatientName" in info:
- fields.append("" + info["PatientName"])
- # Also add dimensions
- if self.shape:
- tmp = [str(d) for d in self.shape]
- fields.append("x".join(tmp))
- # Try adding more fields
- if "SeriesDescription" in info:
- fields.append("'" + info["SeriesDescription"] + "'")
- if "ImageComments" in info:
- fields.append("'" + info["ImageComments"] + "'")
- # Combine
- return " ".join(fields)
- def __repr__(self):
- adr = hex(id(self)).upper()
- return "<DicomSeries with %i images at %s>" % (len(self), adr)
- def get_numpy_array(self):
- """Get (load) the data that this DicomSeries represents, and return
- it as a numpy array. If this serie contains multiple images, the
- resulting array is 3D, otherwise it's 2D.
- """
- # It's easy if no file or if just a single file
- if len(self) == 0:
- raise ValueError("Serie does not contain any files.")
- elif len(self) == 1:
- return self[0].get_numpy_array()
- # Check info
- if self.info is None:
- raise RuntimeError("Cannot return volume if series not finished.")
- # Init data (using what the dicom packaged produces as a reference)
- slice = self[0].get_numpy_array()
- vol = np.zeros(self.shape, dtype=slice.dtype)
- vol[0] = slice
- # Fill volume
- self._progressIndicator.start("loading data", "", len(self))
- for z in range(1, len(self)):
- vol[z] = self[z].get_numpy_array()
- self._progressIndicator.set_progress(z + 1)
- self._progressIndicator.finish()
- # Done
- import gc
- gc.collect()
- return vol
- def _append(self, dcm):
- self._entries.append(dcm)
- def _sort(self):
- self._entries.sort(
- key=lambda k: (
- k.InstanceNumber,
- (
- k.ImagePositionPatient[2]
- if hasattr(k, "ImagePositionPatient")
- else None
- ),
- )
- )
- def _finish(self):
- """
- Evaluate the series of dicom files. Together they should make up
- a volumetric dataset. This means the files should meet certain
- conditions. Also some additional information has to be calculated,
- such as the distance between the slices. This method sets the
- attributes for "shape", "sampling" and "info".
- This method checks:
- * that there are no missing files
- * that the dimensions of all images match
- * that the pixel spacing of all images match
- """
- # The datasets list should be sorted by instance number
- L = self._entries
- if len(L) == 0:
- return
- elif len(L) == 1:
- self._info = L[0].info
- return
- # Get previous
- ds1 = L[0]
- # Init measures to calculate average of
- distance_sum = 0.0
- # Init measures to check (these are in 2D)
- dimensions = ds1.Rows, ds1.Columns
- # sampling = float(ds1.PixelSpacing[0]), float(ds1.PixelSpacing[1])
- sampling = ds1.info["sampling"][:2] # row, column
- for index in range(len(L)):
- # The first round ds1 and ds2 will be the same, for the
- # distance calculation this does not matter
- # Get current
- ds2 = L[index]
- # Get positions
- pos1 = float(ds1.ImagePositionPatient[2])
- pos2 = float(ds2.ImagePositionPatient[2])
- # Update distance_sum to calculate distance later
- distance_sum += abs(pos1 - pos2)
- # Test measures
- dimensions2 = ds2.Rows, ds2.Columns
- # sampling2 = float(ds2.PixelSpacing[0]), float(ds2.PixelSpacing[1])
- sampling2 = ds2.info["sampling"][:2] # row, column
- if dimensions != dimensions2:
- # We cannot produce a volume if the dimensions match
- raise ValueError("Dimensions of slices does not match.")
- if sampling != sampling2:
- # We can still produce a volume, but we should notify the user
- self._progressIndicator.write("Warn: sampling does not match.")
- # Store previous
- ds1 = ds2
- # Finish calculating average distance
- # (Note that there are len(L)-1 distances)
- distance_mean = distance_sum / (len(L) - 1)
- # Set info dict
- self._info = L[0].info.copy()
- # Store information that is specific for the serie
- self._info["shape"] = (len(L),) + ds2.info["shape"]
- self._info["sampling"] = (distance_mean,) + ds2.info["sampling"]
- def list_files(files, path):
- """List all files in the directory, recursively."""
- for item in os.listdir(path):
- item = os.path.join(path, item)
- if os.path.isdir(item):
- list_files(files, item)
- elif os.path.isfile(item):
- files.append(item)
- def process_directory(request, progressIndicator, readPixelData=False):
- """
- Reads dicom files and returns a list of DicomSeries objects, which
- contain information about the data, and can be used to load the
- image or volume data.
- if readPixelData is True, the pixel data of all series is read. By
- default the loading of pixeldata is deferred until it is requested
- using the DicomSeries.get_pixel_array() method. In general, both
- methods should be equally fast.
- """
- # Get directory to examine
- if os.path.isdir(request.filename):
- path = request.filename
- elif os.path.isfile(request.filename):
- path = os.path.dirname(request.filename)
- else: # pragma: no cover - tested earlier
- raise ValueError("Dicom plugin needs a valid filename to examine the directory")
- # Check files
- files = []
- list_files(files, path) # Find files recursively
- # Gather file data and put in DicomSeries
- series = {}
- count = 0
- progressIndicator.start("examining files", "files", len(files))
- for filename in files:
- # Show progress (note that we always start with a 0.0)
- count += 1
- progressIndicator.set_progress(count)
- # Skip DICOMDIR files
- if filename.count("DICOMDIR"): # pragma: no cover
- continue
- # Try loading dicom ...
- try:
- dcm = SimpleDicomReader(filename)
- except NotADicomFile:
- continue # skip non-dicom file
- except Exception as why: # pragma: no cover
- progressIndicator.write(str(why))
- continue
- # Get SUID and register the file with an existing or new series object
- try:
- suid = dcm.SeriesInstanceUID
- except AttributeError: # pragma: no cover
- continue # some other kind of dicom file
- if suid not in series:
- series[suid] = DicomSeries(suid, progressIndicator)
- series[suid]._append(dcm)
- # Finish progress
- # progressIndicator.finish('Found %i series.' % len(series))
- # Make a list and sort, so that the order is deterministic
- series = list(series.values())
- series.sort(key=lambda x: x.suid)
- # Split series if necessary
- for serie in reversed([serie for serie in series]):
- splitSerieIfRequired(serie, series, progressIndicator)
- # Finish all series
- # progressIndicator.start('analyse series', '', len(series))
- series_ = []
- for i in range(len(series)):
- try:
- series[i]._finish()
- series_.append(series[i])
- except Exception as err: # pragma: no cover
- progressIndicator.write(str(err))
- pass # Skip serie (probably report-like file without pixels)
- # progressIndicator.set_progress(i+1)
- progressIndicator.finish("Found %i correct series." % len(series_))
- # Done
- return series_
- def splitSerieIfRequired(serie, series, progressIndicator):
- """
- Split the serie in multiple series if this is required. The choice
- is based on examing the image position relative to the previous
- image. If it differs too much, it is assumed that there is a new
- dataset. This can happen for example in unspitted gated CT data.
- """
- # Sort the original list and get local name
- serie._sort()
- L = serie._entries
- # Init previous slice
- ds1 = L[0]
- # Check whether we can do this
- if "ImagePositionPatient" not in ds1:
- return
- # Initialize a list of new lists
- L2 = [[ds1]]
- # Init slice distance estimate
- distance = 0
- for index in range(1, len(L)):
- # Get current slice
- ds2 = L[index]
- # Get positions
- pos1 = float(ds1.ImagePositionPatient[2])
- pos2 = float(ds2.ImagePositionPatient[2])
- # Get distances
- newDist = abs(pos1 - pos2)
- # deltaDist = abs(firstPos-pos2)
- # If the distance deviates more than 2x from what we've seen,
- # we can agree it's a new dataset.
- if distance and newDist > 2.1 * distance:
- L2.append([])
- distance = 0
- else:
- # Test missing file
- if distance and newDist > 1.5 * distance:
- progressIndicator.write(
- "Warning: missing file after %r" % ds1._filename
- )
- distance = newDist
- # Add to last list
- L2[-1].append(ds2)
- # Store previous
- ds1 = ds2
- # Split if we should
- if len(L2) > 1:
- # At what position are we now?
- i = series.index(serie)
- # Create new series
- series2insert = []
- for L in L2:
- newSerie = DicomSeries(serie.suid, progressIndicator)
- newSerie._entries = L
- series2insert.append(newSerie)
- # Insert series and remove self
- for newSerie in reversed(series2insert):
- series.insert(i, newSerie)
- series.remove(serie)
|