| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341 |
- import numpy as np
- from . import _hoghistogram
- from .._shared import utils
- def _hog_normalize_block(block, method, eps=1e-5):
- if method == 'L1':
- out = block / (np.sum(np.abs(block)) + eps)
- elif method == 'L1-sqrt':
- out = np.sqrt(block / (np.sum(np.abs(block)) + eps))
- elif method == 'L2':
- out = block / np.sqrt(np.sum(block**2) + eps**2)
- elif method == 'L2-Hys':
- out = block / np.sqrt(np.sum(block**2) + eps**2)
- out = np.minimum(out, 0.2)
- out = out / np.sqrt(np.sum(out**2) + eps**2)
- else:
- raise ValueError('Selected block normalization method is invalid.')
- return out
- def _hog_channel_gradient(channel):
- """Compute unnormalized gradient image along `row` and `col` axes.
- Parameters
- ----------
- channel : (M, N) ndarray
- Grayscale image or one of image channel.
- Returns
- -------
- g_row, g_col : channel gradient along `row` and `col` axes correspondingly.
- """
- g_row = np.empty(channel.shape, dtype=channel.dtype)
- g_row[0, :] = 0
- g_row[-1, :] = 0
- g_row[1:-1, :] = channel[2:, :] - channel[:-2, :]
- g_col = np.empty(channel.shape, dtype=channel.dtype)
- g_col[:, 0] = 0
- g_col[:, -1] = 0
- g_col[:, 1:-1] = channel[:, 2:] - channel[:, :-2]
- return g_row, g_col
- @utils.channel_as_last_axis(multichannel_output=False)
- def hog(
- image,
- orientations=9,
- pixels_per_cell=(8, 8),
- cells_per_block=(3, 3),
- block_norm='L2-Hys',
- visualize=False,
- transform_sqrt=False,
- feature_vector=True,
- *,
- channel_axis=None,
- ):
- """Extract Histogram of Oriented Gradients (HOG) for a given image.
- Compute a Histogram of Oriented Gradients (HOG) by
- 1. (optional) global image normalization
- 2. computing the gradient image in `row` and `col`
- 3. computing gradient histograms
- 4. normalizing across blocks
- 5. flattening into a feature vector
- Parameters
- ----------
- image : (M, N[, C]) ndarray
- Input image.
- orientations : int, optional
- Number of orientation bins.
- pixels_per_cell : 2-tuple (int, int), optional
- Size (in pixels) of a cell.
- cells_per_block : 2-tuple (int, int), optional
- Number of cells in each block.
- block_norm : str {'L1', 'L1-sqrt', 'L2', 'L2-Hys'}, optional
- Block normalization method:
- ``L1``
- Normalization using L1-norm.
- ``L1-sqrt``
- Normalization using L1-norm, followed by square root.
- ``L2``
- Normalization using L2-norm.
- ``L2-Hys``
- Normalization using L2-norm, followed by limiting the
- maximum values to 0.2 (`Hys` stands for `hysteresis`) and
- renormalization using L2-norm. (default)
- For details, see [3]_, [4]_.
- visualize : bool, optional
- Also return an image of the HOG. For each cell and orientation bin,
- the image contains a line segment that is centered at the cell center,
- is perpendicular to the midpoint of the range of angles spanned by the
- orientation bin, and has intensity proportional to the corresponding
- histogram value.
- transform_sqrt : bool, optional
- Apply power law compression to normalize the image before
- processing. DO NOT use this if the image contains negative
- values. Also see `notes` section below.
- feature_vector : bool, optional
- Return the data as a feature vector by calling .ravel() on the result
- just before returning.
- channel_axis : int or None, optional
- If None, the image is assumed to be a grayscale (single channel) image.
- Otherwise, this parameter indicates which axis of the array corresponds
- to channels.
- .. versionadded:: 0.19
- `channel_axis` was added in 0.19.
- Returns
- -------
- out : (n_blocks_row, n_blocks_col, n_cells_row, n_cells_col, n_orient) ndarray
- HOG descriptor for the image. If `feature_vector` is True, a 1D
- (flattened) array is returned.
- hog_image : (M, N) ndarray, optional
- A visualisation of the HOG image. Only provided if `visualize` is True.
- Raises
- ------
- ValueError
- If the image is too small given the values of pixels_per_cell and
- cells_per_block.
- References
- ----------
- .. [1] https://en.wikipedia.org/wiki/Histogram_of_oriented_gradients
- .. [2] Dalal, N and Triggs, B, Histograms of Oriented Gradients for
- Human Detection, IEEE Computer Society Conference on Computer
- Vision and Pattern Recognition 2005 San Diego, CA, USA,
- https://lear.inrialpes.fr/people/triggs/pubs/Dalal-cvpr05.pdf,
- :DOI:`10.1109/CVPR.2005.177`
- .. [3] Lowe, D.G., Distinctive image features from scale-invatiant
- keypoints, International Journal of Computer Vision (2004) 60: 91,
- http://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf,
- :DOI:`10.1023/B:VISI.0000029664.99615.94`
- .. [4] Dalal, N, Finding People in Images and Videos,
- Human-Computer Interaction [cs.HC], Institut National Polytechnique
- de Grenoble - INPG, 2006,
- https://tel.archives-ouvertes.fr/tel-00390303/file/NavneetDalalThesis.pdf
- Notes
- -----
- The presented code implements the HOG extraction method from [2]_ with
- the following changes: (I) blocks of (3, 3) cells are used ((2, 2) in the
- paper); (II) no smoothing within cells (Gaussian spatial window with sigma=8pix
- in the paper); (III) L1 block normalization is used (L2-Hys in the paper).
- Power law compression, also known as Gamma correction, is used to reduce
- the effects of shadowing and illumination variations. The compression makes
- the dark regions lighter. When the kwarg `transform_sqrt` is set to
- ``True``, the function computes the square root of each color channel
- and then applies the hog algorithm to the image.
- """
- image = np.atleast_2d(image)
- float_dtype = utils._supported_float_type(image.dtype)
- image = image.astype(float_dtype, copy=False)
- multichannel = channel_axis is not None
- ndim_spatial = image.ndim - 1 if multichannel else image.ndim
- if ndim_spatial != 2:
- raise ValueError(
- 'Only images with two spatial dimensions are '
- 'supported. If using with color/multichannel '
- 'images, specify `channel_axis`.'
- )
- """
- The first stage applies an optional global image normalization
- equalisation that is designed to reduce the influence of illumination
- effects. In practice we use gamma (power law) compression, either
- computing the square root or the log of each color channel.
- Image texture strength is typically proportional to the local surface
- illumination so this compression helps to reduce the effects of local
- shadowing and illumination variations.
- """
- if transform_sqrt:
- image = np.sqrt(image)
- """
- The second stage computes first order image gradients. These capture
- contour, silhouette and some texture information, while providing
- further resistance to illumination variations. The locally dominant
- color channel is used, which provides color invariance to a large
- extent. Variant methods may also include second order image derivatives,
- which act as primitive bar detectors - a useful feature for capturing,
- e.g. bar like structures in bicycles and limbs in humans.
- """
- if multichannel:
- g_row_by_ch = np.empty_like(image, dtype=float_dtype)
- g_col_by_ch = np.empty_like(image, dtype=float_dtype)
- g_magn = np.empty_like(image, dtype=float_dtype)
- for idx_ch in range(image.shape[2]):
- (
- g_row_by_ch[:, :, idx_ch],
- g_col_by_ch[:, :, idx_ch],
- ) = _hog_channel_gradient(image[:, :, idx_ch])
- g_magn[:, :, idx_ch] = np.hypot(
- g_row_by_ch[:, :, idx_ch], g_col_by_ch[:, :, idx_ch]
- )
- # For each pixel select the channel with the highest gradient magnitude
- idcs_max = g_magn.argmax(axis=2)
- rr, cc = np.meshgrid(
- np.arange(image.shape[0]),
- np.arange(image.shape[1]),
- indexing='ij',
- sparse=True,
- )
- g_row = g_row_by_ch[rr, cc, idcs_max]
- g_col = g_col_by_ch[rr, cc, idcs_max]
- else:
- g_row, g_col = _hog_channel_gradient(image)
- """
- The third stage aims to produce an encoding that is sensitive to
- local image content while remaining resistant to small changes in
- pose or appearance. The adopted method pools gradient orientation
- information locally in the same way as the SIFT [Lowe 2004]
- feature. The image window is divided into small spatial regions,
- called "cells". For each cell we accumulate a local 1-D histogram
- of gradient or edge orientations over all the pixels in the
- cell. This combined cell-level 1-D histogram forms the basic
- "orientation histogram" representation. Each orientation histogram
- divides the gradient angle range into a fixed number of
- predetermined bins. The gradient magnitudes of the pixels in the
- cell are used to vote into the orientation histogram.
- """
- s_row, s_col = image.shape[:2]
- c_row, c_col = pixels_per_cell
- b_row, b_col = cells_per_block
- n_cells_row = int(s_row // c_row) # number of cells along row-axis
- n_cells_col = int(s_col // c_col) # number of cells along col-axis
- # compute orientations integral images
- orientation_histogram = np.zeros(
- (n_cells_row, n_cells_col, orientations), dtype=float
- )
- g_row = g_row.astype(float, copy=False)
- g_col = g_col.astype(float, copy=False)
- _hoghistogram.hog_histograms(
- g_col,
- g_row,
- c_col,
- c_row,
- s_col,
- s_row,
- n_cells_col,
- n_cells_row,
- orientations,
- orientation_histogram,
- )
- # now compute the histogram for each cell
- hog_image = None
- if visualize:
- from .. import draw
- radius = min(c_row, c_col) // 2 - 1
- orientations_arr = np.arange(orientations)
- # set dr_arr, dc_arr to correspond to midpoints of orientation bins
- orientation_bin_midpoints = np.pi * (orientations_arr + 0.5) / orientations
- dr_arr = radius * np.sin(orientation_bin_midpoints)
- dc_arr = radius * np.cos(orientation_bin_midpoints)
- hog_image = np.zeros((s_row, s_col), dtype=float_dtype)
- for r in range(n_cells_row):
- for c in range(n_cells_col):
- for o, dr, dc in zip(orientations_arr, dr_arr, dc_arr):
- centre = tuple([r * c_row + c_row // 2, c * c_col + c_col // 2])
- rr, cc = draw.line(
- int(centre[0] - dc),
- int(centre[1] + dr),
- int(centre[0] + dc),
- int(centre[1] - dr),
- )
- hog_image[rr, cc] += orientation_histogram[r, c, o]
- """
- The fourth stage computes normalization, which takes local groups of
- cells and contrast normalizes their overall responses before passing
- to next stage. Normalization introduces better invariance to illumination,
- shadowing, and edge contrast. It is performed by accumulating a measure
- of local histogram "energy" over local groups of cells that we call
- "blocks". The result is used to normalize each cell in the block.
- Typically each individual cell is shared between several blocks, but
- its normalizations are block dependent and thus different. The cell
- thus appears several times in the final output vector with different
- normalizations. This may seem redundant but it improves the performance.
- We refer to the normalized block descriptors as Histogram of Oriented
- Gradient (HOG) descriptors.
- """
- n_blocks_row = (n_cells_row - b_row) + 1
- n_blocks_col = (n_cells_col - b_col) + 1
- if n_blocks_col <= 0 or n_blocks_row <= 0:
- min_row = b_row * c_row
- min_col = b_col * c_col
- raise ValueError(
- 'The input image is too small given the values of '
- 'pixels_per_cell and cells_per_block. '
- 'It should have at least: '
- f'{min_row} rows and {min_col} cols.'
- )
- normalized_blocks = np.zeros(
- (n_blocks_row, n_blocks_col, b_row, b_col, orientations), dtype=float_dtype
- )
- for r in range(n_blocks_row):
- for c in range(n_blocks_col):
- block = orientation_histogram[r : r + b_row, c : c + b_col, :]
- normalized_blocks[r, c, :] = _hog_normalize_block(block, method=block_norm)
- """
- The final step collects the HOG descriptors from all blocks of a dense
- overlapping grid of blocks covering the detection window into a combined
- feature vector for use in the window classifier.
- """
- if feature_vector:
- normalized_blocks = normalized_blocks.ravel()
- if visualize:
- return normalized_blocks, hog_image
- else:
- return normalized_blocks
|