_manipulate.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. import collections.abc
  2. import math
  3. import re
  4. from collections import defaultdict
  5. from itertools import chain
  6. from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, Union
  7. import torch
  8. import torch.utils.checkpoint
  9. from torch import nn as nn
  10. from torch import Tensor
  11. from timm.layers import use_reentrant_ckpt
  12. __all__ = ['model_parameters', 'named_apply', 'named_modules', 'named_modules_with_params', 'adapt_input_conv',
  13. 'group_with_matcher', 'group_modules', 'group_parameters', 'flatten_modules', 'checkpoint_seq', 'checkpoint']
  14. def model_parameters(model: nn.Module, exclude_head: bool = False):
  15. if exclude_head:
  16. # FIXME this a bit of a quick and dirty hack to skip classifier head params based on ordering
  17. return [p for p in model.parameters()][:-2]
  18. else:
  19. return model.parameters()
  20. def named_apply(
  21. fn: Callable,
  22. module: nn.Module, name='',
  23. depth_first: bool = True,
  24. include_root: bool = False,
  25. ) -> nn.Module:
  26. if not depth_first and include_root:
  27. fn(module=module, name=name)
  28. for child_name, child_module in module.named_children():
  29. child_name = '.'.join((name, child_name)) if name else child_name
  30. named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True)
  31. if depth_first and include_root:
  32. fn(module=module, name=name)
  33. return module
  34. def named_modules(
  35. module: nn.Module,
  36. name: str = '',
  37. depth_first: bool = True,
  38. include_root: bool = False,
  39. ):
  40. if not depth_first and include_root:
  41. yield name, module
  42. for child_name, child_module in module.named_children():
  43. child_name = '.'.join((name, child_name)) if name else child_name
  44. yield from named_modules(
  45. module=child_module, name=child_name, depth_first=depth_first, include_root=True)
  46. if depth_first and include_root:
  47. yield name, module
  48. def named_modules_with_params(
  49. module: nn.Module,
  50. name: str = '',
  51. depth_first: bool = True,
  52. include_root: bool = False,
  53. ):
  54. if module._parameters and not depth_first and include_root:
  55. yield name, module
  56. for child_name, child_module in module.named_children():
  57. child_name = '.'.join((name, child_name)) if name else child_name
  58. yield from named_modules_with_params(
  59. module=child_module, name=child_name, depth_first=depth_first, include_root=True)
  60. if module._parameters and depth_first and include_root:
  61. yield name, module
  62. MATCH_PREV_GROUP = (99999,)
  63. def group_with_matcher(
  64. named_objects: Iterator[Tuple[str, Any]],
  65. group_matcher: Union[Dict, Callable],
  66. return_values: bool = False,
  67. reverse: bool = False
  68. ):
  69. if isinstance(group_matcher, dict):
  70. # dictionary matcher contains a dict of raw-string regex expr that must be compiled
  71. compiled = []
  72. for group_ordinal, (group_name, mspec) in enumerate(group_matcher.items()):
  73. if mspec is None:
  74. continue
  75. # map all matching specifications into 3-tuple (compiled re, prefix, suffix)
  76. if isinstance(mspec, (tuple, list)):
  77. # multi-entry match specifications require each sub-spec to be a 2-tuple (re, suffix)
  78. for sspec in mspec:
  79. compiled += [(re.compile(sspec[0]), (group_ordinal,), sspec[1])]
  80. else:
  81. compiled += [(re.compile(mspec), (group_ordinal,), None)]
  82. group_matcher = compiled
  83. def _get_grouping(name):
  84. if isinstance(group_matcher, (list, tuple)):
  85. for match_fn, prefix, suffix in group_matcher:
  86. r = match_fn.match(name)
  87. if r:
  88. parts = (prefix, r.groups(), suffix)
  89. # map all tuple elem to int for numeric sort, filter out None entries
  90. return tuple(map(float, chain.from_iterable(filter(None, parts))))
  91. return float('inf'), # un-matched layers (neck, head) mapped to largest ordinal
  92. else:
  93. ord = group_matcher(name)
  94. if not isinstance(ord, collections.abc.Iterable):
  95. return ord,
  96. return tuple(ord)
  97. # map layers into groups via ordinals (ints or tuples of ints) from matcher
  98. grouping = defaultdict(list)
  99. for k, v in named_objects:
  100. grouping[_get_grouping(k)].append(v if return_values else k)
  101. # remap to integers
  102. layer_id_to_param = defaultdict(list)
  103. lid = -1
  104. for k in sorted(filter(lambda x: x is not None, grouping.keys())):
  105. if lid < 0 or k[-1] != MATCH_PREV_GROUP[0]:
  106. lid += 1
  107. layer_id_to_param[lid].extend(grouping[k])
  108. if reverse:
  109. assert not return_values, "reverse mapping only sensible for name output"
  110. # output reverse mapping
  111. param_to_layer_id = {}
  112. for lid, lm in layer_id_to_param.items():
  113. for n in lm:
  114. param_to_layer_id[n] = lid
  115. return param_to_layer_id
  116. return layer_id_to_param
  117. def group_parameters(
  118. module: nn.Module,
  119. group_matcher,
  120. return_values: bool = False,
  121. reverse: bool = False,
  122. ):
  123. return group_with_matcher(
  124. module.named_parameters(), group_matcher, return_values=return_values, reverse=reverse)
  125. def group_modules(
  126. module: nn.Module,
  127. group_matcher,
  128. return_values: bool = False,
  129. reverse: bool = False,
  130. ):
  131. return group_with_matcher(
  132. named_modules_with_params(module), group_matcher, return_values=return_values, reverse=reverse)
  133. def flatten_modules(
  134. named_modules: Iterator[Tuple[str, nn.Module]],
  135. depth: int = 1,
  136. prefix: Union[str, Tuple[str, ...]] = '',
  137. module_types: Union[str, Tuple[Type[nn.Module]]] = 'sequential',
  138. ):
  139. prefix_is_tuple = isinstance(prefix, tuple)
  140. if isinstance(module_types, str):
  141. if module_types == 'container':
  142. module_types = (nn.Sequential, nn.ModuleList, nn.ModuleDict)
  143. else:
  144. module_types = (nn.Sequential,)
  145. for name, module in named_modules:
  146. if depth and isinstance(module, module_types):
  147. yield from flatten_modules(
  148. module.named_children(),
  149. depth - 1,
  150. prefix=(name,) if prefix_is_tuple else name,
  151. module_types=module_types,
  152. )
  153. else:
  154. if prefix_is_tuple:
  155. name = prefix + (name,)
  156. yield name, module
  157. else:
  158. if prefix:
  159. name = '.'.join([prefix, name])
  160. yield name, module
  161. def checkpoint(
  162. function,
  163. *args,
  164. use_reentrant: Optional[bool] = None,
  165. **kwargs,
  166. ):
  167. """ checkpoint wrapper fn
  168. A thin wrapper around torch.utils.checkpoint.checkpoint to default
  169. use_reentrant to False
  170. """
  171. if use_reentrant is None:
  172. use_reentrant = use_reentrant_ckpt()
  173. return torch.utils.checkpoint.checkpoint(
  174. function,
  175. *args,
  176. use_reentrant=use_reentrant,
  177. **kwargs,
  178. )
  179. def checkpoint_seq(
  180. functions,
  181. x,
  182. every: int = 1,
  183. flatten: bool = False,
  184. skip_last: bool = False,
  185. use_reentrant: Optional[bool] = None,
  186. ):
  187. r"""A helper function for checkpointing sequential models.
  188. Sequential models execute a list of modules/functions in order
  189. (sequentially). Therefore, we can divide such a sequence into segments
  190. and checkpoint each segment. All segments except run in :func:`torch.no_grad`
  191. manner, i.e., not storing the intermediate activations. The inputs of each
  192. checkpointed segment will be saved for re-running the segment in the backward pass.
  193. See :func:`~torch.utils.checkpoint.checkpoint` on how checkpointing works.
  194. .. warning::
  195. Checkpointing currently only supports :func:`torch.autograd.backward`
  196. and only if its `inputs` argument is not passed. :func:`torch.autograd.grad`
  197. is not supported.
  198. .. warning:
  199. At least one of the inputs needs to have :code:`requires_grad=True` if
  200. grads are needed for model inputs, otherwise the checkpointed part of the
  201. model won't have gradients.
  202. Args:
  203. functions: A :class:`torch.nn.Sequential` or the list of modules or functions to run sequentially.
  204. x: A Tensor that is input to :attr:`functions`
  205. every: checkpoint every-n functions (default: 1)
  206. flatten: flatten nn.Sequential of nn.Sequentials
  207. skip_last: skip checkpointing the last function in the sequence if True
  208. use_reentrant: Use re-entrant checkpointing
  209. Returns:
  210. Output of running :attr:`functions` sequentially on :attr:`*inputs`
  211. Example:
  212. >>> model = nn.Sequential(...)
  213. >>> input_var = checkpoint_seq(model, input_var, every=2)
  214. """
  215. if use_reentrant is None:
  216. use_reentrant = use_reentrant_ckpt()
  217. def run_function(start, end, functions):
  218. def forward(_x):
  219. for j in range(start, end + 1):
  220. _x = functions[j](_x)
  221. return _x
  222. return forward
  223. if isinstance(functions, torch.nn.Sequential):
  224. functions = functions.children()
  225. if flatten:
  226. functions = chain.from_iterable(functions)
  227. if not isinstance(functions, (tuple, list)):
  228. functions = tuple(functions)
  229. num_checkpointed = len(functions)
  230. if skip_last:
  231. num_checkpointed -= 1
  232. end = -1
  233. for start in range(0, num_checkpointed, every):
  234. end = min(start + every - 1, num_checkpointed - 1)
  235. x = torch.utils.checkpoint.checkpoint(
  236. run_function(start, end, functions),
  237. x,
  238. use_reentrant=use_reentrant,
  239. )
  240. if skip_last:
  241. return run_function(end + 1, len(functions) - 1, functions)(x)
  242. return x
  243. def adapt_input_conv(in_chans: int, conv_weight: Tensor) -> Tensor:
  244. conv_type = conv_weight.dtype
  245. conv_weight = conv_weight.float() # Some weights are in torch.half, ensure it's float for sum on CPU
  246. O, I, J, K = conv_weight.shape
  247. if in_chans == 1:
  248. if I > 3:
  249. assert conv_weight.shape[1] % 3 == 0
  250. # For models with space2depth stems
  251. conv_weight = conv_weight.reshape(O, I // 3, 3, J, K)
  252. conv_weight = conv_weight.sum(dim=2, keepdim=False)
  253. else:
  254. conv_weight = conv_weight.sum(dim=1, keepdim=True)
  255. elif in_chans != 3:
  256. if I != 3:
  257. raise NotImplementedError('Weight format not supported by conversion.')
  258. else:
  259. # NOTE this strategy should be better than random init, but there could be other combinations of
  260. # the original RGB input layer weights that'd work better for specific cases.
  261. repeat = int(math.ceil(in_chans / 3))
  262. conv_weight = conv_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :]
  263. conv_weight *= (3 / float(in_chans))
  264. conv_weight = conv_weight.to(conv_type)
  265. return conv_weight