unique.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. import numpy as np
  2. def unique_rows(ar):
  3. """Remove repeated rows from a 2D array.
  4. In particular, if given an array of coordinates of shape
  5. (Npoints, Ndim), it will remove repeated points.
  6. Parameters
  7. ----------
  8. ar : ndarray, shape (M, N)
  9. The input array.
  10. Returns
  11. -------
  12. ar_out : ndarray, shape (P, N)
  13. A copy of the input array with repeated rows removed.
  14. Raises
  15. ------
  16. ValueError : if `ar` is not two-dimensional.
  17. Notes
  18. -----
  19. The function will generate a copy of `ar` if it is not
  20. C-contiguous, which will negatively affect performance for large
  21. input arrays.
  22. Examples
  23. --------
  24. >>> ar = np.array([[1, 0, 1],
  25. ... [0, 1, 0],
  26. ... [1, 0, 1]], np.uint8)
  27. >>> unique_rows(ar)
  28. array([[0, 1, 0],
  29. [1, 0, 1]], dtype=uint8)
  30. """
  31. if ar.ndim != 2:
  32. raise ValueError(
  33. "unique_rows() only makes sense for 2D arrays, " f"got {ar.ndim}"
  34. )
  35. # the view in the next line only works if the array is C-contiguous
  36. ar = np.ascontiguousarray(ar)
  37. # np.unique() finds identical items in a raveled array. To make it
  38. # see each row as a single item, we create a view of each row as a
  39. # byte string of length itemsize times number of columns in `ar`
  40. ar_row_view = ar.view(f"|S{ar.itemsize * ar.shape[1]}")
  41. _, unique_row_indices = np.unique(ar_row_view, return_index=True)
  42. ar_out = ar[unique_row_indices]
  43. return ar_out