collections.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. """Augmenters that are collections of other augmenters.
  2. List of augmenters:
  3. * :class:`RandAugment`
  4. Added in 0.4.0.
  5. """
  6. from __future__ import print_function, division, absolute_import
  7. import numpy as np
  8. from .. import parameters as iap
  9. from .. import random as iarandom
  10. from . import meta
  11. from . import arithmetic
  12. from . import flip
  13. from . import pillike
  14. from . import size as sizelib
  15. class RandAugment(meta.Sequential):
  16. """Apply RandAugment to inputs as described in the corresponding paper.
  17. See paper::
  18. Cubuk et al.
  19. RandAugment: Practical automated data augmentation with a reduced
  20. search space
  21. .. note::
  22. The paper contains essentially no hyperparameters for the individual
  23. augmentation techniques. The hyperparameters used here come mostly
  24. from the official code repository, which however seems to only contain
  25. code for CIFAR10 and SVHN, not for ImageNet. So some guesswork was
  26. involved and a few of the hyperparameters were also taken from
  27. https://github.com/ildoonet/pytorch-randaugment/blob/master/RandAugment/augmentations.py .
  28. This implementation deviates from the code repository for all PIL
  29. enhance operations. In the repository these use a factor of
  30. ``0.1 + M*1.8/M_max``, which would lead to a factor of ``0.1`` for the
  31. weakest ``M`` of ``M=0``. For e.g. ``Brightness`` that would result in
  32. a basically black image. This definition is fine for AutoAugment (from
  33. where the code and hyperparameters are copied), which optimizes
  34. each transformation's ``M`` individually, but not for RandAugment,
  35. which uses a single fixed ``M``. We hence redefine these
  36. hyperparameters to ``1.0 + S * M * 0.9/M_max``, where ``S`` is
  37. randomly either ``1`` or ``-1``.
  38. We also note that it is not entirely clear which transformations
  39. were used in the ImageNet experiments. The paper lists some
  40. transformations in Figure 2, but names others in the text too (e.g.
  41. crops, flips, cutout). While Figure 2 lists the Identity function,
  42. this transformation seems to not appear in the repository (and in fact,
  43. the function ``randaugment(N, M)`` doesn't seem to exist in the
  44. repository either). So we also make a best guess here about what
  45. transformations might have been used.
  46. .. warning::
  47. This augmenter only works with image data, not e.g. bounding boxes.
  48. The used PIL-based affine transformations are not yet able to
  49. process non-image data. (This augmenter uses PIL-based affine
  50. transformations to ensure that outputs are as similar as possible
  51. to the paper's implementation.)
  52. Added in 0.4.0.
  53. **Supported dtypes**:
  54. minimum of (
  55. :class:`~imgaug.augmenters.flip.Fliplr`,
  56. :class:`~imgaug.augmenters.size.KeepSizeByResize`,
  57. :class:`~imgaug.augmenters.size.Crop`,
  58. :class:`~imgaug.augmenters.meta.Sequential`,
  59. :class:`~imgaug.augmenters.meta.SomeOf`,
  60. :class:`~imgaug.augmenters.meta.Identity`,
  61. :class:`~imgaug.augmenters.pillike.Autocontrast`,
  62. :class:`~imgaug.augmenters.pillike.Equalize`,
  63. :class:`~imgaug.augmenters.arithmetic.Invert`,
  64. :class:`~imgaug.augmenters.pillike.Affine`,
  65. :class:`~imgaug.augmenters.pillike.Posterize`,
  66. :class:`~imgaug.augmenters.pillike.Solarize`,
  67. :class:`~imgaug.augmenters.pillike.EnhanceColor`,
  68. :class:`~imgaug.augmenters.pillike.EnhanceContrast`,
  69. :class:`~imgaug.augmenters.pillike.EnhanceBrightness`,
  70. :class:`~imgaug.augmenters.pillike.EnhanceSharpness`,
  71. :class:`~imgaug.augmenters.arithmetic.Cutout`,
  72. :class:`~imgaug.augmenters.pillike.FilterBlur`,
  73. :class:`~imgaug.augmenters.pillike.FilterSmooth`
  74. )
  75. Parameters
  76. ----------
  77. n : int or tuple of int or list of int or imgaug.parameters.StochasticParameter or None, optional
  78. Parameter ``N`` in the paper, i.e. number of transformations to apply.
  79. The paper suggests ``N=2`` for ImageNet.
  80. See also parameter ``n`` in :class:`~imgaug.augmenters.meta.SomeOf`
  81. for more details.
  82. Note that horizontal flips (p=50%) and crops are always applied. This
  83. parameter only determines how many of the other transformations
  84. are applied per image.
  85. m : int or tuple of int or list of int or imgaug.parameters.StochasticParameter or None, optional
  86. Parameter ``M`` in the paper, i.e. magnitude/severity/strength of the
  87. applied transformations in interval ``[0 .. 30]`` with ``M=0`` being
  88. the weakest. The paper suggests for ImageNet ``M=9`` in case of
  89. ResNet-50 and ``M=28`` in case of EfficientNet-B7.
  90. This implementation uses a default value of ``(6, 12)``, i.e. the
  91. value is uniformly sampled per image from the interval ``[6 .. 12]``.
  92. This ensures greater diversity of transformations than using a single
  93. fixed value.
  94. * If ``int``: That value will always be used.
  95. * If ``tuple`` ``(a, b)``: A random value will be uniformly sampled per
  96. image from the discrete interval ``[a .. b]``.
  97. * If ``list``: A random value will be picked from the list per image.
  98. * If ``StochasticParameter``: For ``B`` images in a batch, ``B`` values
  99. will be sampled per augmenter (provided the augmenter is dependent
  100. on the magnitude).
  101. cval : number or tuple of number or list of number or imgaug.ALL or imgaug.parameters.StochasticParameter, optional
  102. The constant value to use when filling in newly created pixels.
  103. See parameter `fillcolor` in
  104. :class:`~imgaug.augmenters.pillike.Affine` for details.
  105. The paper's repository uses an RGB value of ``125, 122, 113``.
  106. This implementation uses a single intensity value of ``128``, which
  107. should work better for cases where input images don't have exactly
  108. ``3`` channels or come from a different dataset than used by the
  109. paper.
  110. seed : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional
  111. See :func:`~imgaug.augmenters.meta.Augmenter.__init__`.
  112. name : None or str, optional
  113. See :func:`~imgaug.augmenters.meta.Augmenter.__init__`.
  114. random_state : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional
  115. Old name for parameter `seed`.
  116. Its usage will not yet cause a deprecation warning,
  117. but it is still recommended to use `seed` now.
  118. Outdated since 0.4.0.
  119. deterministic : bool, optional
  120. Deprecated since 0.4.0.
  121. See method ``to_deterministic()`` for an alternative and for
  122. details about what the "deterministic mode" actually does.
  123. Examples
  124. --------
  125. >>> import imgaug.augmenters as iaa
  126. >>> aug = iaa.RandAugment(n=2, m=9)
  127. Create a RandAugment augmenter similar to the suggested hyperparameters
  128. in the paper.
  129. >>> aug = iaa.RandAugment(m=30)
  130. Create a RandAugment augmenter with maximum magnitude/strength.
  131. >>> aug = iaa.RandAugment(m=(0, 9))
  132. Create a RandAugment augmenter that applies its transformations with a
  133. random magnitude between ``0`` (very weak) and ``9`` (recommended for
  134. ImageNet and ResNet-50). ``m`` is sampled per transformation.
  135. >>> aug = iaa.RandAugment(n=(0, 3))
  136. Create a RandAugment augmenter that applies ``0`` to ``3`` of its
  137. child transformations to images. Horizontal flips (p=50%) and crops are
  138. always applied.
  139. """
  140. _M_MAX = 30
  141. # according to paper:
  142. # N=2, M=9 is optimal for ImageNet with ResNet-50
  143. # N=2, M=28 is optimal for ImageNet with EfficientNet-B7
  144. # for cval they use [125, 122, 113]
  145. # Added in 0.4.0.
  146. def __init__(self, n=2, m=(6, 12), cval=128,
  147. seed=None, name=None,
  148. random_state="deprecated", deterministic="deprecated"):
  149. # pylint: disable=invalid-name
  150. seed = seed if random_state == "deprecated" else random_state
  151. rng = iarandom.RNG(seed)
  152. # we don't limit the value range to 10 here, because the paper
  153. # gives several examples of using more than 10 for M
  154. m = iap.handle_discrete_param(
  155. m, "m", value_range=(0, None),
  156. tuple_to_uniform=True, list_to_choice=True,
  157. allow_floats=False)
  158. self._m = m
  159. self._cval = cval
  160. # The paper says in Appendix A.2.3 "ImageNet", that they actually
  161. # always execute Horizontal Flips and Crops first and only then a
  162. # random selection of the other transformations.
  163. # Hence, we split here into two groups.
  164. # It's not really clear what crop parameters they use, so we
  165. # choose [0..M] here.
  166. initial_augs = self._create_initial_augmenters_list(m)
  167. main_augs = self._create_main_augmenters_list(m, cval)
  168. # assign random state to all child augmenters
  169. for lst in [initial_augs, main_augs]:
  170. for augmenter in lst:
  171. augmenter.random_state = rng
  172. super(RandAugment, self).__init__(
  173. [
  174. meta.Sequential(initial_augs,
  175. seed=rng.derive_rng_()),
  176. meta.SomeOf(n, main_augs, random_order=True,
  177. seed=rng.derive_rng_())
  178. ],
  179. seed=rng, name=name,
  180. random_state=random_state, deterministic=deterministic
  181. )
  182. # Added in 0.4.0.
  183. @classmethod
  184. def _create_initial_augmenters_list(cls, m):
  185. # pylint: disable=invalid-name
  186. return [
  187. flip.Fliplr(0.5),
  188. sizelib.KeepSizeByResize(
  189. # assuming that the paper implementation crops M pixels from
  190. # 224px ImageNet images, we crop here a fraction of
  191. # M*(M_max/224)
  192. sizelib.Crop(
  193. percent=iap.Divide(
  194. iap.Uniform(0, m),
  195. 224,
  196. elementwise=True),
  197. sample_independently=True,
  198. keep_size=False),
  199. interpolation="linear"
  200. )
  201. ]
  202. # Added in 0.4.0.
  203. @classmethod
  204. def _create_main_augmenters_list(cls, m, cval):
  205. # pylint: disable=invalid-name
  206. m_max = cls._M_MAX
  207. def _float_parameter(level, maxval):
  208. maxval_norm = maxval / m_max
  209. return iap.Multiply(level, maxval_norm, elementwise=True)
  210. def _int_parameter(level, maxval):
  211. # paper applies just int(), so we don't round here
  212. return iap.Discretize(_float_parameter(level, maxval),
  213. round=False)
  214. # In the paper's code they use the definition from AutoAugment,
  215. # which is 0.1 + M*1.8/10. But that results in 0.1 for M=0, i.e. for
  216. # Brightness an almost black image, while M=5 would result in an
  217. # unaltered image. For AutoAugment that may be fine, as M is optimized
  218. # for each operation individually, but here we have only one fixed M
  219. # for all operations. Hence, we rather set this to 1.0 +/- M*0.9/10,
  220. # so that M=10 would result in 0.1 or 1.9.
  221. def _enhance_parameter(level):
  222. fparam = _float_parameter(level, 0.9)
  223. return iap.Clip(
  224. iap.Add(1.0, iap.RandomSign(fparam), elementwise=True),
  225. 0.1, 1.9
  226. )
  227. def _subtract(a, b):
  228. return iap.Subtract(a, b, elementwise=True)
  229. def _affine(*args, **kwargs):
  230. kwargs["fillcolor"] = cval
  231. if "center" not in kwargs:
  232. kwargs["center"] = (0.0, 0.0)
  233. return pillike.Affine(*args, **kwargs)
  234. _rnd_s = iap.RandomSign
  235. shear_max = np.rad2deg(0.3)
  236. # we don't add vertical flips here, paper is not really clear about
  237. # whether they used them or not
  238. return [
  239. meta.Identity(),
  240. pillike.Autocontrast(cutoff=0),
  241. pillike.Equalize(),
  242. arithmetic.Invert(p=1.0),
  243. # they use Image.rotate() for the rotation, which uses
  244. # the image center as the rotation center
  245. _affine(rotate=_rnd_s(_float_parameter(m, 30)),
  246. center=(0.5, 0.5)),
  247. # paper uses 4 - int_parameter(M, 4)
  248. pillike.Posterize(
  249. nb_bits=_subtract(
  250. 8,
  251. iap.Clip(_int_parameter(m, 6), 0, 6)
  252. )
  253. ),
  254. # paper uses 256 - int_parameter(M, 256)
  255. pillike.Solarize(
  256. p=1.0,
  257. threshold=iap.Clip(
  258. _subtract(256, _int_parameter(m, 256)),
  259. 0, 256
  260. )
  261. ),
  262. pillike.EnhanceColor(_enhance_parameter(m)),
  263. pillike.EnhanceContrast(_enhance_parameter(m)),
  264. pillike.EnhanceBrightness(_enhance_parameter(m)),
  265. pillike.EnhanceSharpness(_enhance_parameter(m)),
  266. _affine(shear={"x": _rnd_s(_float_parameter(m, shear_max))}),
  267. _affine(shear={"y": _rnd_s(_float_parameter(m, shear_max))}),
  268. _affine(translate_percent={"x": _rnd_s(_float_parameter(m, 0.33))}),
  269. _affine(translate_percent={"y": _rnd_s(_float_parameter(m, 0.33))}),
  270. # paper code uses 20px on CIFAR (i.e. size 20/32), no information
  271. # on ImageNet values so we just use the same values
  272. arithmetic.Cutout(1,
  273. size=iap.Clip(
  274. _float_parameter(m, 20 / 32), 0, 20 / 32),
  275. squared=True,
  276. fill_mode="constant",
  277. cval=cval),
  278. pillike.FilterBlur(),
  279. pillike.FilterSmooth()
  280. ]
  281. # Added in 0.4.0.
  282. def get_parameters(self):
  283. """See :func:`~imgaug.augmenters.meta.Augmenter.get_parameters`."""
  284. someof = self[1]
  285. return [someof.n, self._m, self._cval]