sift.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. import math
  2. import numpy as np
  3. import scipy.ndimage as ndi
  4. from .._shared.utils import check_nD, _supported_float_type
  5. from ..feature.util import DescriptorExtractor, FeatureDetector
  6. from .._shared.filters import gaussian
  7. from ..transform import rescale
  8. from ..util import img_as_float
  9. from ._sift import _local_max, _ori_distances, _update_histogram
  10. def _edgeness(hxx, hyy, hxy):
  11. """Compute edgeness (eq. 18 of Otero et. al. IPOL paper)"""
  12. trace = hxx + hyy
  13. determinant = hxx * hyy - hxy * hxy
  14. return (trace * trace) / determinant
  15. def _sparse_gradient(vol, positions):
  16. """Gradient of a 3D volume at the provided `positions`.
  17. For SIFT we only need the gradient at specific positions and do not need
  18. the gradient at the edge positions, so can just use this simple
  19. implementation instead of numpy.gradient.
  20. """
  21. p0 = positions[..., 0]
  22. p1 = positions[..., 1]
  23. p2 = positions[..., 2]
  24. g0 = vol[p0 + 1, p1, p2] - vol[p0 - 1, p1, p2]
  25. g0 *= 0.5
  26. g1 = vol[p0, p1 + 1, p2] - vol[p0, p1 - 1, p2]
  27. g1 *= 0.5
  28. g2 = vol[p0, p1, p2 + 1] - vol[p0, p1, p2 - 1]
  29. g2 *= 0.5
  30. return g0, g1, g2
  31. def _hessian(d, positions):
  32. """Compute the non-redundant 3D Hessian terms at the requested positions.
  33. Source: "Anatomy of the SIFT Method" p.380 (13)
  34. """
  35. p0 = positions[..., 0]
  36. p1 = positions[..., 1]
  37. p2 = positions[..., 2]
  38. two_d0 = 2 * d[p0, p1, p2]
  39. # 0 = row, 1 = col, 2 = octave
  40. h00 = d[p0 - 1, p1, p2] + d[p0 + 1, p1, p2] - two_d0
  41. h11 = d[p0, p1 - 1, p2] + d[p0, p1 + 1, p2] - two_d0
  42. h22 = d[p0, p1, p2 - 1] + d[p0, p1, p2 + 1] - two_d0
  43. h01 = 0.25 * (
  44. d[p0 + 1, p1 + 1, p2]
  45. - d[p0 - 1, p1 + 1, p2]
  46. - d[p0 + 1, p1 - 1, p2]
  47. + d[p0 - 1, p1 - 1, p2]
  48. )
  49. h02 = 0.25 * (
  50. d[p0 + 1, p1, p2 + 1]
  51. - d[p0 + 1, p1, p2 - 1]
  52. + d[p0 - 1, p1, p2 - 1]
  53. - d[p0 - 1, p1, p2 + 1]
  54. )
  55. h12 = 0.25 * (
  56. d[p0, p1 + 1, p2 + 1]
  57. - d[p0, p1 + 1, p2 - 1]
  58. + d[p0, p1 - 1, p2 - 1]
  59. - d[p0, p1 - 1, p2 + 1]
  60. )
  61. return (h00, h11, h22, h01, h02, h12)
  62. def _offsets(grad, hess):
  63. """Compute position refinement offsets from gradient and Hessian.
  64. This is equivalent to np.linalg.solve(-H, J) where H is the Hessian
  65. matrix and J is the gradient (Jacobian).
  66. This analytical solution is adapted from (BSD-licensed) C code by
  67. Otero et. al (see SIFT docstring References).
  68. """
  69. h00, h11, h22, h01, h02, h12 = hess
  70. g0, g1, g2 = grad
  71. det = h00 * h11 * h22
  72. det -= h00 * h12 * h12
  73. det -= h01 * h01 * h22
  74. det += 2 * h01 * h02 * h12
  75. det -= h02 * h02 * h11
  76. aa = (h11 * h22 - h12 * h12) / det
  77. ab = (h02 * h12 - h01 * h22) / det
  78. ac = (h01 * h12 - h02 * h11) / det
  79. bb = (h00 * h22 - h02 * h02) / det
  80. bc = (h01 * h02 - h00 * h12) / det
  81. cc = (h00 * h11 - h01 * h01) / det
  82. offset0 = -aa * g0 - ab * g1 - ac * g2
  83. offset1 = -ab * g0 - bb * g1 - bc * g2
  84. offset2 = -ac * g0 - bc * g1 - cc * g2
  85. return np.stack((offset0, offset1, offset2), axis=-1)
  86. class SIFT(FeatureDetector, DescriptorExtractor):
  87. """SIFT feature detection and descriptor extraction.
  88. Parameters
  89. ----------
  90. upsampling : int, optional
  91. Prior to the feature detection the image is upscaled by a factor
  92. of 1 (no upscaling), 2 or 4. Method: Bi-cubic interpolation.
  93. n_octaves : int, optional
  94. Maximum number of octaves. With every octave the image size is
  95. halved and the sigma doubled. The number of octaves will be
  96. reduced as needed to keep at least 12 pixels along each dimension
  97. at the smallest scale.
  98. n_scales : int, optional
  99. Maximum number of scales in every octave.
  100. sigma_min : float, optional
  101. The blur level of the seed image. If upsampling is enabled
  102. sigma_min is scaled by factor 1/upsampling
  103. sigma_in : float, optional
  104. The assumed blur level of the input image.
  105. c_dog : float, optional
  106. Threshold to discard low contrast extrema in the DoG. It's final
  107. value is dependent on n_scales by the relation:
  108. final_c_dog = (2^(1/n_scales)-1) / (2^(1/3)-1) * c_dog
  109. c_edge : float, optional
  110. Threshold to discard extrema that lie in edges. If H is the
  111. Hessian of an extremum, its "edgeness" is described by
  112. tr(H)²/det(H). If the edgeness is higher than
  113. (c_edge + 1)²/c_edge, the extremum is discarded.
  114. n_bins : int, optional
  115. Number of bins in the histogram that describes the gradient
  116. orientations around keypoint.
  117. lambda_ori : float, optional
  118. The window used to find the reference orientation of a keypoint
  119. has a width of 6 * lambda_ori * sigma and is weighted by a
  120. standard deviation of 2 * lambda_ori * sigma.
  121. c_max : float, optional
  122. The threshold at which a secondary peak in the orientation
  123. histogram is accepted as orientation
  124. lambda_descr : float, optional
  125. The window used to define the descriptor of a keypoint has a width
  126. of 2 * lambda_descr * sigma * (n_hist+1)/n_hist and is weighted by
  127. a standard deviation of lambda_descr * sigma.
  128. n_hist : int, optional
  129. The window used to define the descriptor of a keypoint consists of
  130. n_hist * n_hist histograms.
  131. n_ori : int, optional
  132. The number of bins in the histograms of the descriptor patch.
  133. Attributes
  134. ----------
  135. delta_min : float
  136. The sampling distance of the first octave. It's final value is
  137. 1/upsampling.
  138. float_dtype : type
  139. The datatype of the image.
  140. scalespace_sigmas : (n_octaves, n_scales + 3) array
  141. The sigma value of all scales in all octaves.
  142. keypoints : (N, 2) array
  143. Keypoint coordinates as ``(row, col)``.
  144. positions : (N, 2) array
  145. Subpixel-precision keypoint coordinates as ``(row, col)``.
  146. sigmas : (N,) array
  147. The corresponding sigma (blur) value of a keypoint.
  148. scales : (N,) array
  149. The corresponding scale of a keypoint.
  150. orientations : (N,) array
  151. The orientations of the gradient around every keypoint.
  152. octaves : (N,) array
  153. The corresponding octave of a keypoint.
  154. descriptors : (N, n_hist*n_hist*n_ori) array
  155. The descriptors of a keypoint.
  156. Notes
  157. -----
  158. The SIFT algorithm was developed by David Lowe [1]_, [2]_ and later
  159. patented by the University of British Columbia. Since the patent expired in
  160. 2020 it's free to use. The implementation here closely follows the
  161. detailed description in [3]_, including use of the same default parameters.
  162. References
  163. ----------
  164. .. [1] D.G. Lowe. "Object recognition from local scale-invariant
  165. features", Proceedings of the Seventh IEEE International
  166. Conference on Computer Vision, 1999, vol.2, pp. 1150-1157.
  167. :DOI:`10.1109/ICCV.1999.790410`
  168. .. [2] D.G. Lowe. "Distinctive Image Features from Scale-Invariant
  169. Keypoints", International Journal of Computer Vision, 2004,
  170. vol. 60, pp. 91–110.
  171. :DOI:`10.1023/B:VISI.0000029664.99615.94`
  172. .. [3] I. R. Otero and M. Delbracio. "Anatomy of the SIFT Method",
  173. Image Processing On Line, 4 (2014), pp. 370–396.
  174. :DOI:`10.5201/ipol.2014.82`
  175. Examples
  176. --------
  177. >>> from skimage.feature import SIFT, match_descriptors
  178. >>> from skimage.data import camera
  179. >>> from skimage.transform import rotate
  180. >>> img1 = camera()
  181. >>> img2 = rotate(camera(), 90)
  182. >>> detector_extractor1 = SIFT()
  183. >>> detector_extractor2 = SIFT()
  184. >>> detector_extractor1.detect_and_extract(img1)
  185. >>> detector_extractor2.detect_and_extract(img2)
  186. >>> matches = match_descriptors(detector_extractor1.descriptors,
  187. ... detector_extractor2.descriptors,
  188. ... max_ratio=0.6)
  189. >>> matches[10:15]
  190. array([[ 10, 412],
  191. [ 11, 417],
  192. [ 12, 407],
  193. [ 13, 411],
  194. [ 14, 406]])
  195. >>> detector_extractor1.keypoints[matches[10:15, 0]]
  196. array([[ 95, 214],
  197. [ 97, 211],
  198. [ 97, 218],
  199. [102, 215],
  200. [104, 218]])
  201. >>> detector_extractor2.keypoints[matches[10:15, 1]]
  202. array([[297, 95],
  203. [301, 97],
  204. [294, 97],
  205. [297, 102],
  206. [293, 104]])
  207. """
  208. def __init__(
  209. self,
  210. upsampling=2,
  211. n_octaves=8,
  212. n_scales=3,
  213. sigma_min=1.6,
  214. sigma_in=0.5,
  215. c_dog=0.04 / 3,
  216. c_edge=10,
  217. n_bins=36,
  218. lambda_ori=1.5,
  219. c_max=0.8,
  220. lambda_descr=6,
  221. n_hist=4,
  222. n_ori=8,
  223. ):
  224. if upsampling in [1, 2, 4]:
  225. self.upsampling = upsampling
  226. else:
  227. raise ValueError("upsampling must be 1, 2 or 4")
  228. self.n_octaves = n_octaves
  229. self.n_scales = n_scales
  230. self.sigma_min = sigma_min / upsampling
  231. self.sigma_in = sigma_in
  232. self.c_dog = (2 ** (1 / n_scales) - 1) / (2 ** (1 / 3) - 1) * c_dog
  233. self.c_edge = c_edge
  234. self.n_bins = n_bins
  235. self.lambda_ori = lambda_ori
  236. self.c_max = c_max
  237. self.lambda_descr = lambda_descr
  238. self.n_hist = n_hist
  239. self.n_ori = n_ori
  240. self.delta_min = 1 / upsampling
  241. self.float_dtype = None
  242. self.scalespace_sigmas = None
  243. self.keypoints = None
  244. self.positions = None
  245. self.sigmas = None
  246. self.scales = None
  247. self.orientations = None
  248. self.octaves = None
  249. self.descriptors = None
  250. @property
  251. def deltas(self):
  252. """The sampling distances of all octaves"""
  253. deltas = self.delta_min * np.power(
  254. 2, np.arange(self.n_octaves), dtype=self.float_dtype
  255. )
  256. return deltas
  257. def _set_number_of_octaves(self, image_shape):
  258. size_min = 12 # minimum size of last octave
  259. s0 = min(image_shape) * self.upsampling
  260. max_octaves = int(math.log2(s0 / size_min) + 1)
  261. if max_octaves < self.n_octaves:
  262. self.n_octaves = max_octaves
  263. def _create_scalespace(self, image):
  264. """Source: "Anatomy of the SIFT Method" Alg. 1
  265. Construction of the scalespace by gradually blurring (scales) and
  266. downscaling (octaves) the image.
  267. """
  268. scalespace = []
  269. if self.upsampling > 1:
  270. image = rescale(image, self.upsampling, order=1)
  271. # smooth to sigma_min, assuming sigma_in
  272. image = gaussian(
  273. image,
  274. sigma=self.upsampling * math.sqrt(self.sigma_min**2 - self.sigma_in**2),
  275. mode='reflect',
  276. )
  277. # Eq. 10: sigmas.shape = (n_octaves, n_scales + 3).
  278. # The three extra scales are:
  279. # One for the differences needed for DoG and two auxiliary
  280. # images (one at either end) for peak_local_max with exclude
  281. # border = True (see Fig. 5)
  282. # The smoothing doubles after n_scales steps.
  283. tmp = np.power(2, np.arange(self.n_scales + 3) / self.n_scales)
  284. tmp *= self.sigma_min
  285. # all sigmas for the gaussian scalespace
  286. sigmas = self.deltas[:, np.newaxis] / self.deltas[0] * tmp[np.newaxis, :]
  287. self.scalespace_sigmas = sigmas
  288. # Eq. 7: Gaussian smoothing depends on difference with previous sigma
  289. # gaussian_sigmas.shape = (n_octaves, n_scales + 2)
  290. var_diff = np.diff(sigmas * sigmas, axis=1)
  291. gaussian_sigmas = np.sqrt(var_diff) / self.deltas[:, np.newaxis]
  292. # one octave is represented by a 3D image with depth (n_scales+x)
  293. for o in range(self.n_octaves):
  294. # Temporarily put scales axis first so octave[i] is C-contiguous
  295. # (this makes Gaussian filtering faster).
  296. octave = np.empty(
  297. (self.n_scales + 3,) + image.shape, dtype=self.float_dtype, order='C'
  298. )
  299. octave[0] = image
  300. for s in range(1, self.n_scales + 3):
  301. # blur new scale assuming sigma of the last one
  302. gaussian(
  303. octave[s - 1],
  304. sigma=gaussian_sigmas[o, s - 1],
  305. mode='reflect',
  306. out=octave[s],
  307. )
  308. # move scales to last axis as expected by other methods
  309. scalespace.append(np.moveaxis(octave, 0, -1))
  310. if o < self.n_octaves - 1:
  311. # downscale the image by taking every second pixel
  312. image = octave[self.n_scales][::2, ::2]
  313. return scalespace
  314. def _inrange(self, a, dim):
  315. return (
  316. (a[:, 0] > 0)
  317. & (a[:, 0] < dim[0] - 1)
  318. & (a[:, 1] > 0)
  319. & (a[:, 1] < dim[1] - 1)
  320. )
  321. def _find_localize_evaluate(self, dogspace, img_shape):
  322. """Source: "Anatomy of the SIFT Method" Alg. 4-9
  323. 1) first find all extrema of a (3, 3, 3) neighborhood
  324. 2) use second order Taylor development to refine the positions to
  325. sub-pixel precision
  326. 3) filter out extrema that have low contrast and lie on edges or close
  327. to the image borders
  328. """
  329. extrema_pos = []
  330. extrema_scales = []
  331. extrema_sigmas = []
  332. threshold = self.c_dog * 0.8
  333. for o, (octave, delta) in enumerate(zip(dogspace, self.deltas)):
  334. # find extrema
  335. keys = _local_max(np.ascontiguousarray(octave), threshold)
  336. if keys.size == 0:
  337. extrema_pos.append(np.empty((0, 2)))
  338. continue
  339. # localize extrema
  340. oshape = octave.shape
  341. refinement_iterations = 5
  342. offset_max = 0.6
  343. for i in range(refinement_iterations):
  344. if i > 0:
  345. # exclude any keys that have moved out of bounds
  346. keys = keys[self._inrange(keys, oshape), :]
  347. # Jacobian and Hessian of all extrema
  348. grad = _sparse_gradient(octave, keys)
  349. hess = _hessian(octave, keys)
  350. # solve for offset of the extremum
  351. off = _offsets(grad, hess)
  352. if i == refinement_iterations - 1:
  353. break
  354. # offset is too big and an increase would not bring us out of
  355. # bounds
  356. wrong_position_pos = np.logical_and(
  357. off > offset_max, keys + 1 < tuple([a - 1 for a in oshape])
  358. )
  359. wrong_position_neg = np.logical_and(off < -offset_max, keys - 1 > 0)
  360. if not np.any(np.logical_or(wrong_position_neg, wrong_position_pos)):
  361. break
  362. keys[wrong_position_pos] += 1
  363. keys[wrong_position_neg] -= 1
  364. # mask for all extrema that have been localized successfully
  365. finished = np.all(np.abs(off) < offset_max, axis=1)
  366. keys = keys[finished]
  367. off = off[finished]
  368. grad = [g[finished] for g in grad]
  369. # value of extremum in octave
  370. vals = octave[keys[:, 0], keys[:, 1], keys[:, 2]]
  371. # values at interpolated point
  372. w = vals
  373. for i in range(3):
  374. w += 0.5 * grad[i] * off[:, i]
  375. h00, h11, h01 = hess[0][finished], hess[1][finished], hess[3][finished]
  376. sigmaratio = self.scalespace_sigmas[0, 1] / self.scalespace_sigmas[0, 0]
  377. # filter for contrast, edgeness and borders
  378. contrast_threshold = self.c_dog
  379. contrast_filter = np.abs(w) > contrast_threshold
  380. edge_threshold = np.square(self.c_edge + 1) / self.c_edge
  381. edge_response = _edgeness(
  382. h00[contrast_filter], h11[contrast_filter], h01[contrast_filter]
  383. )
  384. edge_filter = np.abs(edge_response) <= edge_threshold
  385. keys = keys[contrast_filter][edge_filter]
  386. off = off[contrast_filter][edge_filter]
  387. yx = ((keys[:, :2] + off[:, :2]) * delta).astype(self.float_dtype)
  388. sigmas = self.scalespace_sigmas[o, keys[:, 2]] * np.power(
  389. sigmaratio, off[:, 2]
  390. )
  391. border_filter = np.all(
  392. np.logical_and(
  393. (yx - sigmas[:, np.newaxis]) > 0.0,
  394. (yx + sigmas[:, np.newaxis]) < img_shape,
  395. ),
  396. axis=1,
  397. )
  398. extrema_pos.append(yx[border_filter])
  399. extrema_scales.append(keys[border_filter, 2])
  400. extrema_sigmas.append(sigmas[border_filter])
  401. octave_indices = np.concatenate(
  402. [np.full(len(p), i) for i, p in enumerate(extrema_pos)]
  403. )
  404. if len(octave_indices) == 0:
  405. raise RuntimeError(
  406. "SIFT found no features. Try passing in an image containing "
  407. "greater intensity contrasts between adjacent pixels."
  408. )
  409. extrema_pos = np.concatenate(extrema_pos)
  410. extrema_scales = np.concatenate(extrema_scales)
  411. extrema_sigmas = np.concatenate(extrema_sigmas)
  412. return extrema_pos, extrema_scales, extrema_sigmas, octave_indices
  413. def _fit(self, h):
  414. """Refine the position of the peak by fitting it to a parabola"""
  415. return (h[0] - h[2]) / (2 * (h[0] + h[2] - 2 * h[1]))
  416. def _compute_orientation(
  417. self, positions_oct, scales_oct, sigmas_oct, octaves, gaussian_scalespace
  418. ):
  419. """Source: "Anatomy of the SIFT Method" Alg. 11
  420. Calculates the orientation of the gradient around every keypoint
  421. """
  422. gradient_space = []
  423. # list for keypoints that have more than one reference orientation
  424. keypoint_indices = []
  425. keypoint_angles = []
  426. keypoint_octave = []
  427. orientations = np.zeros_like(sigmas_oct, dtype=self.float_dtype)
  428. key_count = 0
  429. for o, (octave, delta) in enumerate(zip(gaussian_scalespace, self.deltas)):
  430. gradient_space.append(np.gradient(octave))
  431. in_oct = octaves == o
  432. if not np.any(in_oct):
  433. continue
  434. positions = positions_oct[in_oct]
  435. scales = scales_oct[in_oct]
  436. sigmas = sigmas_oct[in_oct]
  437. oshape = octave.shape[:2]
  438. # convert to octave's dimensions
  439. yx = positions / delta
  440. sigma = sigmas / delta
  441. # dimensions of the patch
  442. radius = 3 * self.lambda_ori * sigma
  443. p_min = np.maximum(0, yx - radius[:, np.newaxis] + 0.5).astype(int)
  444. p_max = np.minimum(
  445. yx + radius[:, np.newaxis] + 0.5, (oshape[0] - 1, oshape[1] - 1)
  446. ).astype(int)
  447. # orientation histogram
  448. hist = np.empty(self.n_bins, dtype=self.float_dtype)
  449. avg_kernel = np.full((3,), 1 / 3, dtype=self.float_dtype)
  450. for k in range(len(yx)):
  451. hist[:] = 0
  452. # use the patch coordinates to get the gradient and then
  453. # normalize them
  454. r, c = np.meshgrid(
  455. np.arange(p_min[k, 0], p_max[k, 0] + 1),
  456. np.arange(p_min[k, 1], p_max[k, 1] + 1),
  457. indexing='ij',
  458. sparse=True,
  459. )
  460. gradient_row = gradient_space[o][0][r, c, scales[k]]
  461. gradient_col = gradient_space[o][1][r, c, scales[k]]
  462. r = r.astype(self.float_dtype, copy=False)
  463. c = c.astype(self.float_dtype, copy=False)
  464. r -= yx[k, 0]
  465. c -= yx[k, 1]
  466. # gradient magnitude and angles
  467. magnitude = np.sqrt(np.square(gradient_row) + np.square(gradient_col))
  468. theta = np.mod(np.arctan2(gradient_col, gradient_row), 2 * np.pi)
  469. # more weight to center values
  470. kernel = np.exp(
  471. np.divide(r * r + c * c, -2 * (self.lambda_ori * sigma[k]) ** 2)
  472. )
  473. # fill the histogram
  474. bins = np.floor(
  475. (theta / (2 * np.pi) * self.n_bins + 0.5) % self.n_bins
  476. ).astype(int)
  477. np.add.at(hist, bins, kernel * magnitude)
  478. # smooth the histogram and find the maximum
  479. hist = np.concatenate((hist[-6:], hist, hist[:6]))
  480. for _ in range(6): # number of smoothings
  481. hist = np.convolve(hist, avg_kernel, mode='same')
  482. hist = hist[6:-6]
  483. max_filter = ndi.maximum_filter(hist, [3], mode='wrap')
  484. # if an angle is in 80% percent range of the maximum, a
  485. # new keypoint is created for it
  486. maxima = np.nonzero(
  487. np.logical_and(
  488. hist >= (self.c_max * np.max(hist)), max_filter == hist
  489. )
  490. )
  491. # save the angles
  492. for c, m in enumerate(maxima[0]):
  493. neigh = np.arange(m - 1, m + 2) % len(hist)
  494. # use neighbors to fit a parabola, to get more accurate
  495. # result
  496. ori = (m + self._fit(hist[neigh]) + 0.5) * 2 * np.pi / self.n_bins
  497. if ori > np.pi:
  498. ori -= 2 * np.pi
  499. if c == 0:
  500. orientations[key_count] = ori
  501. else:
  502. keypoint_indices.append(key_count)
  503. keypoint_angles.append(ori)
  504. keypoint_octave.append(o)
  505. key_count += 1
  506. self.positions = np.concatenate(
  507. (positions_oct, positions_oct[keypoint_indices])
  508. )
  509. self.scales = np.concatenate((scales_oct, scales_oct[keypoint_indices]))
  510. self.sigmas = np.concatenate((sigmas_oct, sigmas_oct[keypoint_indices]))
  511. self.orientations = np.concatenate((orientations, keypoint_angles))
  512. self.octaves = np.concatenate((octaves, keypoint_octave))
  513. # return the gradient_space to reuse it to find the descriptor
  514. return gradient_space
  515. def _rotate(self, row, col, angle):
  516. c = math.cos(angle)
  517. s = math.sin(angle)
  518. rot_row = c * row + s * col
  519. rot_col = -s * row + c * col
  520. return rot_row, rot_col
  521. def _compute_descriptor(self, gradient_space):
  522. """Source: "Anatomy of the SIFT Method" Alg. 12
  523. Calculates the descriptor for every keypoint
  524. """
  525. n_key = len(self.scales)
  526. self.descriptors = np.empty(
  527. (n_key, self.n_hist**2 * self.n_ori), dtype=np.uint8
  528. )
  529. # indices of the histograms
  530. hists = np.arange(1, self.n_hist + 1, dtype=self.float_dtype)
  531. # indices of the bins
  532. bins = np.arange(1, self.n_ori + 1, dtype=self.float_dtype)
  533. key_numbers = np.arange(n_key)
  534. for o, (gradient, delta) in enumerate(zip(gradient_space, self.deltas)):
  535. in_oct = self.octaves == o
  536. if not np.any(in_oct):
  537. continue
  538. positions = self.positions[in_oct]
  539. scales = self.scales[in_oct]
  540. sigmas = self.sigmas[in_oct]
  541. orientations = self.orientations[in_oct]
  542. numbers = key_numbers[in_oct]
  543. dim = gradient[0].shape[:2]
  544. center_pos = positions / delta
  545. sigma = sigmas / delta
  546. # dimensions of the patch
  547. radius = self.lambda_descr * (1 + 1 / self.n_hist) * sigma
  548. radius_patch = math.sqrt(2) * radius
  549. p_min = np.asarray(
  550. np.maximum(0, center_pos - radius_patch[:, np.newaxis] + 0.5), dtype=int
  551. )
  552. p_max = np.asarray(
  553. np.minimum(
  554. center_pos + radius_patch[:, np.newaxis] + 0.5,
  555. (dim[0] - 1, dim[1] - 1),
  556. ),
  557. dtype=int,
  558. )
  559. for k in range(len(p_max)):
  560. rad_k = float(radius[k])
  561. ori = float(orientations[k])
  562. histograms = np.zeros(
  563. (self.n_hist, self.n_hist, self.n_ori), dtype=self.float_dtype
  564. )
  565. # the patch
  566. r, c = np.meshgrid(
  567. np.arange(p_min[k, 0], p_max[k, 0]),
  568. np.arange(p_min[k, 1], p_max[k, 1]),
  569. indexing='ij',
  570. sparse=True,
  571. )
  572. # normalized coordinates
  573. r_norm = np.subtract(r, center_pos[k, 0], dtype=self.float_dtype)
  574. c_norm = np.subtract(c, center_pos[k, 1], dtype=self.float_dtype)
  575. r_norm, c_norm = self._rotate(r_norm, c_norm, ori)
  576. # select coordinates and gradient values within the patch
  577. inside = np.maximum(np.abs(r_norm), np.abs(c_norm)) < rad_k
  578. r_norm, c_norm = r_norm[inside], c_norm[inside]
  579. r_idx, c_idx = np.nonzero(inside)
  580. r = r[r_idx, 0]
  581. c = c[0, c_idx]
  582. gradient_row = gradient[0][r, c, scales[k]]
  583. gradient_col = gradient[1][r, c, scales[k]]
  584. # compute the (relative) gradient orientation
  585. theta = np.arctan2(gradient_col, gradient_row) - ori
  586. lam_sig = self.lambda_descr * float(sigma[k])
  587. # Gaussian weighted kernel magnitude
  588. kernel = np.exp((r_norm * r_norm + c_norm * c_norm) / (-2 * lam_sig**2))
  589. magnitude = (
  590. np.sqrt(gradient_row * gradient_row + gradient_col * gradient_col)
  591. * kernel
  592. )
  593. lam_sig_ratio = 2 * lam_sig / self.n_hist
  594. rc_bins = (hists - (1 + self.n_hist) / 2) * lam_sig_ratio
  595. rc_bin_spacing = lam_sig_ratio
  596. ori_bins = (2 * np.pi * bins) / self.n_ori
  597. # distances to the histograms and bins
  598. dist_r = np.abs(np.subtract.outer(rc_bins, r_norm))
  599. dist_c = np.abs(np.subtract.outer(rc_bins, c_norm))
  600. # the orientation histograms/bins that get the contribution
  601. near_t, near_t_val = _ori_distances(ori_bins, theta)
  602. # create the histogram
  603. _update_histogram(
  604. histograms,
  605. near_t,
  606. near_t_val,
  607. magnitude,
  608. dist_r,
  609. dist_c,
  610. rc_bin_spacing,
  611. )
  612. # convert the histograms to a 1d descriptor
  613. histograms = histograms.reshape(-1)
  614. # saturate the descriptor
  615. histograms = np.minimum(histograms, 0.2 * np.linalg.norm(histograms))
  616. # normalize the descriptor
  617. descriptor = (512 * histograms) / np.linalg.norm(histograms)
  618. # quantize the descriptor
  619. descriptor = np.minimum(np.floor(descriptor), 255)
  620. self.descriptors[numbers[k], :] = descriptor
  621. def _preprocess(self, image):
  622. check_nD(image, 2)
  623. image = img_as_float(image)
  624. self.float_dtype = _supported_float_type(image.dtype)
  625. image = image.astype(self.float_dtype, copy=False)
  626. self._set_number_of_octaves(image.shape)
  627. return image
  628. def detect(self, image):
  629. """Detect the keypoints.
  630. Parameters
  631. ----------
  632. image : 2D array
  633. Input image.
  634. """
  635. image = self._preprocess(image)
  636. gaussian_scalespace = self._create_scalespace(image)
  637. dog_scalespace = [np.diff(layer, axis=2) for layer in gaussian_scalespace]
  638. positions, scales, sigmas, octaves = self._find_localize_evaluate(
  639. dog_scalespace, image.shape
  640. )
  641. self._compute_orientation(
  642. positions, scales, sigmas, octaves, gaussian_scalespace
  643. )
  644. self.keypoints = self.positions.round().astype(int)
  645. def extract(self, image):
  646. """Extract the descriptors for all keypoints in the image.
  647. Parameters
  648. ----------
  649. image : 2D array
  650. Input image.
  651. """
  652. image = self._preprocess(image)
  653. gaussian_scalespace = self._create_scalespace(image)
  654. gradient_space = [np.gradient(octave) for octave in gaussian_scalespace]
  655. self._compute_descriptor(gradient_space)
  656. def detect_and_extract(self, image):
  657. """Detect the keypoints and extract their descriptors.
  658. Parameters
  659. ----------
  660. image : 2D array
  661. Input image.
  662. """
  663. image = self._preprocess(image)
  664. gaussian_scalespace = self._create_scalespace(image)
  665. dog_scalespace = [np.diff(layer, axis=2) for layer in gaussian_scalespace]
  666. positions, scales, sigmas, octaves = self._find_localize_evaluate(
  667. dog_scalespace, image.shape
  668. )
  669. gradient_space = self._compute_orientation(
  670. positions, scales, sigmas, octaves, gaussian_scalespace
  671. )
  672. self._compute_descriptor(gradient_space)
  673. self.keypoints = self.positions.round().astype(int)