| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- import numpy as np
- from scipy.spatial import cKDTree, distance
- def _ensure_spacing(coord, spacing, p_norm, max_out):
- """Returns a subset of coord where a minimum spacing is guaranteed.
- Parameters
- ----------
- coord : ndarray
- The coordinates of the considered points.
- spacing : float
- the maximum allowed spacing between the points.
- p_norm : float
- Which Minkowski p-norm to use. Should be in the range [1, inf].
- A finite large p may cause a ValueError if overflow can occur.
- ``inf`` corresponds to the Chebyshev distance and 2 to the
- Euclidean distance.
- max_out : int
- If not None, at most the first ``max_out`` candidates are
- returned.
- Returns
- -------
- output : ndarray
- A subset of coord where a minimum spacing is guaranteed.
- """
- # Use KDtree to find the peaks that are too close to each other
- tree = cKDTree(coord)
- indices = tree.query_ball_point(coord, r=spacing, p=p_norm)
- rejected_peaks_indices = set()
- naccepted = 0
- for idx, candidates in enumerate(indices):
- if idx not in rejected_peaks_indices:
- # keep current point and the points at exactly spacing from it
- candidates.remove(idx)
- dist = distance.cdist(
- [coord[idx]], coord[candidates], "minkowski", p=p_norm
- ).reshape(-1)
- candidates = [c for c, d in zip(candidates, dist) if d < spacing]
- # candidates.remove(keep)
- rejected_peaks_indices.update(candidates)
- naccepted += 1
- if max_out is not None and naccepted >= max_out:
- break
- # Remove the peaks that are too close to each other
- output = np.delete(coord, tuple(rejected_peaks_indices), axis=0)
- if max_out is not None:
- output = output[:max_out]
- return output
- def ensure_spacing(
- coords,
- spacing=1,
- p_norm=np.inf,
- min_split_size=50,
- max_out=None,
- *,
- max_split_size=2000,
- ):
- """Returns a subset of coord where a minimum spacing is guaranteed.
- Parameters
- ----------
- coords : array_like
- The coordinates of the considered points.
- spacing : float
- the maximum allowed spacing between the points.
- p_norm : float
- Which Minkowski p-norm to use. Should be in the range [1, inf].
- A finite large p may cause a ValueError if overflow can occur.
- ``inf`` corresponds to the Chebyshev distance and 2 to the
- Euclidean distance.
- min_split_size : int
- Minimum split size used to process ``coords`` by batch to save
- memory. If None, the memory saving strategy is not applied.
- max_out : int
- If not None, only the first ``max_out`` candidates are returned.
- max_split_size : int
- Maximum split size used to process ``coords`` by batch to save
- memory. This number was decided by profiling with a large number
- of points. Too small a number results in too much looping in
- Python instead of C, slowing down the process, while too large
- a number results in large memory allocations, slowdowns, and,
- potentially, in the process being killed -- see gh-6010. See
- benchmark results `here
- <https://github.com/scikit-image/scikit-image/pull/6035#discussion_r751518691>`_.
- Returns
- -------
- output : array_like
- A subset of coord where a minimum spacing is guaranteed.
- """
- output = coords
- if len(coords):
- coords = np.atleast_2d(coords)
- if min_split_size is None:
- batch_list = [coords]
- else:
- coord_count = len(coords)
- split_idx = [min_split_size]
- split_size = min_split_size
- while coord_count - split_idx[-1] > max_split_size:
- split_size *= 2
- split_idx.append(split_idx[-1] + min(split_size, max_split_size))
- batch_list = np.array_split(coords, split_idx)
- output = np.zeros((0, coords.shape[1]), dtype=coords.dtype)
- for batch in batch_list:
- output = _ensure_spacing(
- np.vstack([output, batch]), spacing, p_norm, max_out
- )
- if max_out is not None and len(output) >= max_out:
- break
- return output
|