byobnet.py 112 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138
  1. """ Bring-Your-Own-Blocks Network
  2. A flexible network w/ dataclass based config for stacking those NN blocks.
  3. This model is currently used to implement the following networks:
  4. GPU Efficient (ResNets) - gernet_l/m/s (original versions called genet, but this was already used (by SENet author)).
  5. Paper: `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090
  6. Code and weights: https://github.com/idstcv/GPU-Efficient-Networks, licensed Apache 2.0
  7. RepVGG - repvgg_*
  8. Paper: `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  9. Code and weights: https://github.com/DingXiaoH/RepVGG, licensed MIT
  10. MobileOne - mobileone_*
  11. Paper: `MobileOne: An Improved One millisecond Mobile Backbone` - https://arxiv.org/abs/2206.04040
  12. Code and weights: https://github.com/apple/ml-mobileone, licensed MIT
  13. In all cases the models have been modified to fit within the design of ByobNet. I've remapped
  14. the original weights and verified accuracies.
  15. For GPU Efficient nets, I used the original names for the blocks since they were for the most part
  16. the same as original residual blocks in ResNe(X)t, DarkNet, and other existing models. Note also some
  17. changes introduced in RegNet were also present in the stem and bottleneck blocks for this model.
  18. A significant number of different network archs can be implemented here, including variants of the
  19. above nets that include attention.
  20. Hacked together by / copyright Ross Wightman, 2021.
  21. """
  22. import math
  23. from dataclasses import dataclass, field, replace
  24. from functools import partial
  25. from typing import Tuple, List, Dict, Optional, Union, Any, Callable, Sequence, Type
  26. import torch
  27. import torch.nn as nn
  28. from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
  29. from timm.layers import (
  30. ClassifierHead,
  31. NormMlpClassifierHead,
  32. ConvNormAct,
  33. BatchNormAct2d,
  34. EvoNorm2dS0a,
  35. AttentionPool2d,
  36. RotAttentionPool2d,
  37. DropPath,
  38. calculate_drop_path_rates,
  39. AvgPool2dSame,
  40. create_conv2d,
  41. get_act_layer,
  42. get_norm_act_layer,
  43. get_attn,
  44. make_divisible,
  45. to_2tuple,
  46. )
  47. from ._builder import build_model_with_cfg
  48. from ._features import feature_take_indices
  49. from ._manipulate import named_apply, checkpoint_seq
  50. from ._registry import generate_default_cfgs, register_model
  51. __all__ = ['ByobNet', 'ByoModelCfg', 'ByoBlockCfg', 'create_byob_stem', 'create_block']
  52. @dataclass
  53. class ByoBlockCfg:
  54. """Block configuration for Bring-Your-Own-Blocks.
  55. Defines configuration for a single block or stage of blocks.
  56. """
  57. type: Union[str, nn.Module]
  58. d: int # block depth (number of block repeats in stage)
  59. c: int # number of output channels for each block in stage
  60. s: int = 2 # stride of stage (first block)
  61. gs: Optional[Union[int, Callable]] = None # group-size of blocks in stage, conv is depthwise if gs == 1
  62. br: float = 1. # bottleneck-ratio of blocks in stage
  63. # NOTE: these config items override the model cfgs that are applied to all blocks by default
  64. attn_layer: Optional[str] = None
  65. attn_kwargs: Optional[Dict[str, Any]] = None
  66. self_attn_layer: Optional[str] = None
  67. self_attn_kwargs: Optional[Dict[str, Any]] = None
  68. block_kwargs: Optional[Dict[str, Any]] = None
  69. @dataclass
  70. class ByoModelCfg:
  71. """Model configuration for Bring-Your-Own-Blocks network.
  72. Defines overall architecture configuration.
  73. """
  74. blocks: Tuple[Union[ByoBlockCfg, Tuple[ByoBlockCfg, ...]], ...]
  75. downsample: str = 'conv1x1'
  76. stem_type: str = '3x3'
  77. stem_pool: Optional[str] = 'maxpool'
  78. stem_chs: Union[int, List[int], Tuple[int, ...]] = 32
  79. width_factor: float = 1.0
  80. num_features: int = 0 # num out_channels for final conv, no final 1x1 conv if 0
  81. zero_init_last: bool = True # zero init last weight (usually bn) in residual path
  82. fixed_input_size: bool = False # model constrained to a fixed-input size / img_size must be provided on creation
  83. # layer config
  84. act_layer: str = 'relu'
  85. norm_layer: str = 'batchnorm'
  86. aa_layer: str = ''
  87. # Head config
  88. head_hidden_size: Optional[int] = None # feat dim of MLP head or AttentionPool output
  89. head_type: str = 'classifier'
  90. # Block config
  91. # NOTE: these config items will be overridden by the block cfg (per-block) if they are set there
  92. attn_layer: Optional[str] = None
  93. attn_kwargs: dict = field(default_factory=lambda: dict())
  94. self_attn_layer: Optional[str] = None
  95. self_attn_kwargs: dict = field(default_factory=lambda: dict())
  96. block_kwargs: Dict[str, Any] = field(default_factory=lambda: dict())
  97. def _rep_vgg_bcfg(d: Tuple[int, ...] = (4, 6, 16, 1), wf: Tuple[float, ...] = (1., 1., 1., 1.), groups: int = 0) -> \
  98. Tuple[ByoBlockCfg, ...]:
  99. """Create RepVGG block configuration.
  100. Args:
  101. d: Depth (number of blocks) per stage.
  102. wf: Width factor per stage.
  103. groups: Number of groups for grouped convolution.
  104. Returns:
  105. Tuple of block configurations.
  106. """
  107. c = (64, 128, 256, 512)
  108. group_size = 0
  109. if groups > 0:
  110. group_size = lambda chs, idx: chs // groups if (idx + 1) % 2 == 0 else 0
  111. bcfg = tuple([ByoBlockCfg(type='rep', d=d, c=c * wf, gs=group_size) for d, c, wf in zip(d, c, wf)])
  112. return bcfg
  113. def _mobileone_bcfg(
  114. d: Tuple[int, ...] = (2, 8, 10, 1),
  115. wf: Tuple[float, ...] = (1., 1., 1., 1.),
  116. se_blocks: Tuple[int, ...] = (),
  117. num_conv_branches: int = 1
  118. ) -> List[List[ByoBlockCfg]]:
  119. """Create MobileOne block configuration.
  120. Args:
  121. d: Depth (number of blocks) per stage.
  122. wf: Width factor per stage.
  123. se_blocks: Number of SE blocks per stage.
  124. num_conv_branches: Number of conv branches.
  125. Returns:
  126. List of block configurations per stage.
  127. """
  128. c = (64, 128, 256, 512)
  129. prev_c = min(64, c[0] * wf[0])
  130. se_blocks = se_blocks or (0,) * len(d)
  131. bcfg = []
  132. for d, c, w, se in zip(d, c, wf, se_blocks):
  133. scfg = []
  134. for i in range(d):
  135. out_c = c * w
  136. bk = dict(num_conv_branches=num_conv_branches)
  137. ak = {}
  138. if i >= d - se:
  139. ak['attn_layer'] = 'se'
  140. scfg += [ByoBlockCfg(type='one', d=1, c=prev_c, gs=1, block_kwargs=bk, **ak)] # depthwise block
  141. scfg += [ByoBlockCfg(
  142. type='one', d=1, c=out_c, gs=0, block_kwargs=dict(kernel_size=1, **bk), **ak)] # pointwise block
  143. prev_c = out_c
  144. bcfg += [scfg]
  145. return bcfg
  146. def interleave_blocks(
  147. types: Tuple[str, str],
  148. d: int,
  149. every: Union[int, List[int]] = 1,
  150. first: bool = False,
  151. **kwargs,
  152. ) -> Tuple[ByoBlockCfg, ...]:
  153. """Interleave 2 block types in stack.
  154. Args:
  155. types: Two block type names to interleave.
  156. d: Total depth of blocks.
  157. every: Interval for alternating blocks.
  158. first: Whether to start with alternate block.
  159. **kwargs: Additional block arguments.
  160. Returns:
  161. Tuple of interleaved block configurations.
  162. """
  163. assert len(types) == 2
  164. if isinstance(every, int):
  165. every = list(range(0 if first else every, d, every + 1))
  166. if not every:
  167. every = [d - 1]
  168. set(every)
  169. blocks = []
  170. for i in range(d):
  171. block_type = types[1] if i in every else types[0]
  172. blocks += [ByoBlockCfg(type=block_type, d=1, **kwargs)]
  173. return tuple(blocks)
  174. def expand_blocks_cfg(stage_blocks_cfg: Union[ByoBlockCfg, Sequence[ByoBlockCfg]]) -> List[ByoBlockCfg]:
  175. """Expand block config into individual block instances.
  176. Args:
  177. stage_blocks_cfg: Block configuration(s) for a stage.
  178. Returns:
  179. List of individual block configurations.
  180. """
  181. if not isinstance(stage_blocks_cfg, Sequence):
  182. stage_blocks_cfg = (stage_blocks_cfg,)
  183. block_cfgs = []
  184. for i, cfg in enumerate(stage_blocks_cfg):
  185. block_cfgs += [replace(cfg, d=1) for _ in range(cfg.d)]
  186. return block_cfgs
  187. def num_groups(group_size: Optional[int], channels: int) -> int:
  188. """Calculate number of groups for grouped convolution.
  189. Args:
  190. group_size: Size of each group (1 for depthwise).
  191. channels: Number of channels.
  192. Returns:
  193. Number of groups.
  194. """
  195. if not group_size: # 0 or None
  196. return 1 # normal conv with 1 group
  197. else:
  198. # NOTE group_size == 1 -> depthwise conv
  199. assert channels % group_size == 0
  200. return channels // group_size
  201. @dataclass
  202. class LayerFn:
  203. """Container for layer factory functions."""
  204. conv_norm_act: Type[nn.Module] = ConvNormAct
  205. norm_act: Type[nn.Module] = BatchNormAct2d
  206. act: Type[nn.Module] = nn.ReLU
  207. attn: Optional[Type[nn.Module]] = None
  208. self_attn: Optional[Type[nn.Module]] = None
  209. class DownsampleAvg(nn.Module):
  210. """Average pool downsampling module.
  211. AvgPool Downsampling as in 'D' ResNet variants.
  212. """
  213. def __init__(
  214. self,
  215. in_chs: int,
  216. out_chs: int,
  217. stride: int = 1,
  218. dilation: int = 1,
  219. apply_act: bool = False,
  220. layers: Optional[LayerFn] = None,
  221. device=None,
  222. dtype=None,
  223. ):
  224. """Initialize DownsampleAvg.
  225. Args:
  226. in_chs: Number of input channels.
  227. out_chs: Number of output channels.
  228. stride: Stride for downsampling.
  229. dilation: Dilation rate.
  230. apply_act: Whether to apply activation.
  231. layers: Layer factory functions.
  232. """
  233. dd = {'device': device, 'dtype': dtype}
  234. super().__init__()
  235. layers = layers or LayerFn()
  236. avg_stride = stride if dilation == 1 else 1
  237. if stride > 1 or dilation > 1:
  238. avg_pool_fn = AvgPool2dSame if avg_stride == 1 and dilation > 1 else nn.AvgPool2d
  239. self.pool = avg_pool_fn(2, avg_stride, ceil_mode=True, count_include_pad=False)
  240. else:
  241. self.pool = nn.Identity()
  242. self.conv = layers.conv_norm_act(in_chs, out_chs, 1, apply_act=apply_act, **dd)
  243. def forward(self, x: torch.Tensor) -> torch.Tensor:
  244. """Forward pass.
  245. Args:
  246. x: Input tensor.
  247. Returns:
  248. Output tensor.
  249. """
  250. return self.conv(self.pool(x))
  251. def create_shortcut(
  252. downsample_type: str,
  253. in_chs: int,
  254. out_chs: int,
  255. stride: int,
  256. dilation: Tuple[int, int],
  257. layers: LayerFn,
  258. **kwargs,
  259. ) -> Optional[nn.Module]:
  260. """Create shortcut connection for residual blocks.
  261. Args:
  262. downsample_type: Type of downsampling ('avg', 'conv1x1', or '').
  263. in_chs: Input channels.
  264. out_chs: Output channels.
  265. stride: Stride for downsampling.
  266. dilation: Dilation rates.
  267. layers: Layer factory functions.
  268. **kwargs: Additional arguments.
  269. Returns:
  270. Shortcut module or None.
  271. """
  272. assert downsample_type in ('avg', 'conv1x1', '')
  273. if in_chs != out_chs or stride != 1 or dilation[0] != dilation[1]:
  274. if not downsample_type:
  275. return None # no shortcut
  276. elif downsample_type == 'avg':
  277. return DownsampleAvg(in_chs, out_chs, stride=stride, dilation=dilation[0], **kwargs)
  278. else:
  279. return layers.conv_norm_act(in_chs, out_chs, kernel_size=1, stride=stride, dilation=dilation[0], **kwargs)
  280. else:
  281. return nn.Identity() # identity shortcut
  282. class BasicBlock(nn.Module):
  283. """ ResNet Basic Block - kxk + kxk
  284. """
  285. def __init__(
  286. self,
  287. in_chs: int,
  288. out_chs: int,
  289. kernel_size: int = 3,
  290. stride: int = 1,
  291. dilation: Tuple[int, int] = (1, 1),
  292. group_size: Optional[int] = None,
  293. bottle_ratio: float = 1.0,
  294. downsample: str = 'avg',
  295. attn_last: bool = True,
  296. linear_out: bool = False,
  297. layers: LayerFn = None,
  298. drop_block: Callable = None,
  299. drop_path_rate: float = 0.,
  300. device=None,
  301. dtype=None,
  302. ):
  303. dd = {'device': device, 'dtype': dtype}
  304. super().__init__()
  305. layers = layers or LayerFn()
  306. mid_chs = make_divisible(out_chs * bottle_ratio)
  307. groups = num_groups(group_size, mid_chs)
  308. self.shortcut = create_shortcut(
  309. downsample,
  310. in_chs,
  311. out_chs,
  312. stride=stride,
  313. dilation=dilation,
  314. apply_act=False,
  315. layers=layers,
  316. **dd,
  317. )
  318. self.conv1_kxk = layers.conv_norm_act(in_chs, mid_chs, kernel_size, stride=stride, dilation=dilation[0], **dd)
  319. self.attn = nn.Identity() if attn_last or layers.attn is None else layers.attn(mid_chs)
  320. self.conv2_kxk = layers.conv_norm_act(
  321. mid_chs,
  322. out_chs,
  323. kernel_size,
  324. dilation=dilation[1],
  325. groups=groups,
  326. drop_layer=drop_block,
  327. apply_act=False,
  328. **dd,
  329. )
  330. self.attn_last = nn.Identity() if not attn_last or layers.attn is None else layers.attn(out_chs, **dd)
  331. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  332. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  333. def init_weights(self, zero_init_last: bool = False):
  334. if zero_init_last and self.shortcut is not None and getattr(self.conv2_kxk.bn, 'weight', None) is not None:
  335. nn.init.zeros_(self.conv2_kxk.bn.weight)
  336. for attn in (self.attn, self.attn_last):
  337. if hasattr(attn, 'reset_parameters'):
  338. attn.reset_parameters()
  339. def forward(self, x):
  340. shortcut = x
  341. x = self.conv1_kxk(x)
  342. x = self.attn(x)
  343. x = self.conv2_kxk(x)
  344. x = self.attn_last(x)
  345. x = self.drop_path(x)
  346. if self.shortcut is not None:
  347. x = x + self.shortcut(shortcut)
  348. return self.act(x)
  349. class BottleneckBlock(nn.Module):
  350. """ ResNet-like Bottleneck Block - 1x1 - kxk - 1x1
  351. """
  352. def __init__(
  353. self,
  354. in_chs: int,
  355. out_chs: int,
  356. kernel_size: int = 3,
  357. stride: int = 1,
  358. dilation: Tuple[int, int] = (1, 1),
  359. bottle_ratio: float = 1.,
  360. group_size: Optional[int] = None,
  361. downsample: str = 'avg',
  362. attn_last: bool = False,
  363. linear_out: bool = False,
  364. extra_conv: bool = False,
  365. bottle_in: bool = False,
  366. layers: LayerFn = None,
  367. drop_block: Callable = None,
  368. drop_path_rate: float = 0.,
  369. device=None,
  370. dtype=None,
  371. ):
  372. dd = {'device': device, 'dtype': dtype}
  373. super().__init__()
  374. layers = layers or LayerFn()
  375. mid_chs = make_divisible((in_chs if bottle_in else out_chs) * bottle_ratio)
  376. groups = num_groups(group_size, mid_chs)
  377. self.shortcut = create_shortcut(
  378. downsample,
  379. in_chs,
  380. out_chs,
  381. stride=stride,
  382. dilation=dilation,
  383. apply_act=False,
  384. layers=layers,
  385. **dd,
  386. )
  387. self.conv1_1x1 = layers.conv_norm_act(in_chs, mid_chs, 1, **dd)
  388. self.conv2_kxk = layers.conv_norm_act(
  389. mid_chs,
  390. mid_chs,
  391. kernel_size,
  392. stride=stride,
  393. dilation=dilation[0],
  394. groups=groups,
  395. drop_layer=drop_block,
  396. **dd,
  397. )
  398. if extra_conv:
  399. self.conv2b_kxk = layers.conv_norm_act(
  400. mid_chs,
  401. mid_chs,
  402. kernel_size,
  403. dilation=dilation[1],
  404. groups=groups,
  405. **dd,
  406. )
  407. else:
  408. self.conv2b_kxk = nn.Identity()
  409. self.attn = nn.Identity() if attn_last or layers.attn is None else layers.attn(mid_chs, **dd)
  410. self.conv3_1x1 = layers.conv_norm_act(mid_chs, out_chs, 1, apply_act=False, **dd)
  411. self.attn_last = nn.Identity() if not attn_last or layers.attn is None else layers.attn(out_chs, **dd)
  412. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  413. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  414. def init_weights(self, zero_init_last: bool = False):
  415. if zero_init_last and self.shortcut is not None and getattr(self.conv3_1x1.bn, 'weight', None) is not None:
  416. nn.init.zeros_(self.conv3_1x1.bn.weight)
  417. for attn in (self.attn, self.attn_last):
  418. if hasattr(attn, 'reset_parameters'):
  419. attn.reset_parameters()
  420. def forward(self, x):
  421. shortcut = x
  422. x = self.conv1_1x1(x)
  423. x = self.conv2_kxk(x)
  424. x = self.conv2b_kxk(x)
  425. x = self.attn(x)
  426. x = self.conv3_1x1(x)
  427. x = self.attn_last(x)
  428. x = self.drop_path(x)
  429. if self.shortcut is not None:
  430. x = x + self.shortcut(shortcut)
  431. return self.act(x)
  432. class DarkBlock(nn.Module):
  433. """ DarkNet-like (1x1 + 3x3 w/ stride) block
  434. The GE-Net impl included a 1x1 + 3x3 block in their search space. It was not used in the feature models.
  435. This block is pretty much a DarkNet block (also DenseNet) hence the name. Neither DarkNet or DenseNet
  436. uses strides within the block (external 3x3 or maxpool downsampling is done in front of the block repeats).
  437. If one does want to use a lot of these blocks w/ stride, I'd recommend using the EdgeBlock (3x3 /w stride + 1x1)
  438. for more optimal compute.
  439. """
  440. def __init__(
  441. self,
  442. in_chs: int,
  443. out_chs: int,
  444. kernel_size: int = 3,
  445. stride: int = 1,
  446. dilation: Tuple[int, int] = (1, 1),
  447. bottle_ratio: float = 1.0,
  448. group_size: Optional[int] = None,
  449. downsample: str = 'avg',
  450. attn_last: bool = True,
  451. linear_out: bool = False,
  452. layers: LayerFn = None,
  453. drop_block: Callable = None,
  454. drop_path_rate: float = 0.,
  455. device=None,
  456. dtype=None,
  457. ):
  458. dd = {'device': device, 'dtype': dtype}
  459. super().__init__()
  460. layers = layers or LayerFn()
  461. mid_chs = make_divisible(out_chs * bottle_ratio)
  462. groups = num_groups(group_size, mid_chs)
  463. self.shortcut = create_shortcut(
  464. downsample,
  465. in_chs,
  466. out_chs,
  467. stride=stride,
  468. dilation=dilation,
  469. apply_act=False,
  470. layers=layers,
  471. **dd,
  472. )
  473. self.conv1_1x1 = layers.conv_norm_act(in_chs, mid_chs, 1, **dd)
  474. self.attn = nn.Identity() if attn_last or layers.attn is None else layers.attn(mid_chs, **dd)
  475. self.conv2_kxk = layers.conv_norm_act(
  476. mid_chs,
  477. out_chs,
  478. kernel_size,
  479. stride=stride,
  480. dilation=dilation[0],
  481. groups=groups,
  482. drop_layer=drop_block,
  483. apply_act=False,
  484. **dd,
  485. )
  486. self.attn_last = nn.Identity() if not attn_last or layers.attn is None else layers.attn(out_chs, **dd)
  487. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  488. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  489. def init_weights(self, zero_init_last: bool = False):
  490. if zero_init_last and self.shortcut is not None and getattr(self.conv2_kxk.bn, 'weight', None) is not None:
  491. nn.init.zeros_(self.conv2_kxk.bn.weight)
  492. for attn in (self.attn, self.attn_last):
  493. if hasattr(attn, 'reset_parameters'):
  494. attn.reset_parameters()
  495. def forward(self, x):
  496. shortcut = x
  497. x = self.conv1_1x1(x)
  498. x = self.attn(x)
  499. x = self.conv2_kxk(x)
  500. x = self.attn_last(x)
  501. x = self.drop_path(x)
  502. if self.shortcut is not None:
  503. x = x + self.shortcut(shortcut)
  504. return self.act(x)
  505. class EdgeBlock(nn.Module):
  506. """ EdgeResidual-like (3x3 + 1x1) block
  507. A two layer block like DarkBlock, but with the order of the 3x3 and 1x1 convs reversed.
  508. Very similar to the EfficientNet Edge-Residual block but this block it ends with activations, is
  509. intended to be used with either expansion or bottleneck contraction, and can use DW/group/non-grouped convs.
  510. FIXME is there a more common 3x3 + 1x1 conv block to name this after?
  511. """
  512. def __init__(
  513. self,
  514. in_chs: int,
  515. out_chs: int,
  516. kernel_size: int = 3,
  517. stride: int = 1,
  518. dilation: Tuple[int, int] = (1, 1),
  519. bottle_ratio: float = 1.0,
  520. group_size: Optional[int] = None,
  521. downsample: str = 'avg',
  522. attn_last: bool = False,
  523. linear_out: bool = False,
  524. layers: LayerFn = None,
  525. drop_block: Callable = None,
  526. drop_path_rate: float = 0.,
  527. device=None,
  528. dtype=None,
  529. ):
  530. dd = {'device': device, 'dtype': dtype}
  531. super().__init__()
  532. layers = layers or LayerFn()
  533. mid_chs = make_divisible(out_chs * bottle_ratio)
  534. groups = num_groups(group_size, mid_chs)
  535. self.shortcut = create_shortcut(
  536. downsample,
  537. in_chs,
  538. out_chs,
  539. stride=stride,
  540. dilation=dilation,
  541. apply_act=False,
  542. layers=layers,
  543. **dd,
  544. )
  545. self.conv1_kxk = layers.conv_norm_act(
  546. in_chs,
  547. mid_chs,
  548. kernel_size,
  549. stride=stride,
  550. dilation=dilation[0],
  551. groups=groups,
  552. drop_layer=drop_block,
  553. **dd,
  554. )
  555. self.attn = nn.Identity() if attn_last or layers.attn is None else layers.attn(mid_chs, **dd)
  556. self.conv2_1x1 = layers.conv_norm_act(mid_chs, out_chs, 1, apply_act=False, **dd)
  557. self.attn_last = nn.Identity() if not attn_last or layers.attn is None else layers.attn(out_chs, **dd)
  558. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  559. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  560. def init_weights(self, zero_init_last: bool = False):
  561. if zero_init_last and self.shortcut is not None and getattr(self.conv2_1x1.bn, 'weight', None) is not None:
  562. nn.init.zeros_(self.conv2_1x1.bn.weight)
  563. for attn in (self.attn, self.attn_last):
  564. if hasattr(attn, 'reset_parameters'):
  565. attn.reset_parameters()
  566. def forward(self, x):
  567. shortcut = x
  568. x = self.conv1_kxk(x)
  569. x = self.attn(x)
  570. x = self.conv2_1x1(x)
  571. x = self.attn_last(x)
  572. x = self.drop_path(x)
  573. if self.shortcut is not None:
  574. x = x + self.shortcut(shortcut)
  575. return self.act(x)
  576. class RepVggBlock(nn.Module):
  577. """ RepVGG Block.
  578. Adapted from impl at https://github.com/DingXiaoH/RepVGG
  579. """
  580. def __init__(
  581. self,
  582. in_chs: int,
  583. out_chs: int,
  584. kernel_size: int = 3,
  585. stride: int = 1,
  586. dilation: Tuple[int, int] = (1, 1),
  587. bottle_ratio: float = 1.0,
  588. group_size: Optional[int] = None,
  589. downsample: str = '',
  590. layers: LayerFn = None,
  591. drop_block: Callable = None,
  592. drop_path_rate: float = 0.,
  593. inference_mode: bool = False,
  594. device=None,
  595. dtype=None,
  596. ):
  597. dd = {'device': device, 'dtype': dtype}
  598. super().__init__()
  599. self.groups = groups = num_groups(group_size, in_chs)
  600. layers = layers or LayerFn()
  601. if inference_mode:
  602. self.reparam_conv = nn.Conv2d(
  603. in_channels=in_chs,
  604. out_channels=out_chs,
  605. kernel_size=kernel_size,
  606. stride=stride,
  607. dilation=dilation,
  608. groups=groups,
  609. bias=True,
  610. **dd,
  611. )
  612. else:
  613. self.reparam_conv = None
  614. use_ident = in_chs == out_chs and stride == 1 and dilation[0] == dilation[1]
  615. self.identity = layers.norm_act(out_chs, apply_act=False, **dd) if use_ident else None
  616. self.conv_kxk = layers.conv_norm_act(
  617. in_chs,
  618. out_chs,
  619. kernel_size,
  620. stride=stride,
  621. dilation=dilation[0],
  622. groups=groups,
  623. drop_layer=drop_block,
  624. apply_act=False,
  625. **dd,
  626. )
  627. self.conv_1x1 = layers.conv_norm_act(
  628. in_chs,
  629. out_chs,
  630. 1,
  631. stride=stride,
  632. groups=groups,
  633. apply_act=False,
  634. **dd,
  635. )
  636. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. and use_ident else nn.Identity()
  637. self.attn = nn.Identity() if layers.attn is None else layers.attn(out_chs, **dd)
  638. self.act = layers.act(inplace=True)
  639. def init_weights(self, zero_init_last: bool = False):
  640. # NOTE this init overrides that base model init with specific changes for the block type
  641. for m in self.modules():
  642. if isinstance(m, nn.BatchNorm2d):
  643. nn.init.normal_(m.weight, .1, .1)
  644. nn.init.normal_(m.bias, 0, .1)
  645. if hasattr(self.attn, 'reset_parameters'):
  646. self.attn.reset_parameters()
  647. def forward(self, x):
  648. if self.reparam_conv is not None:
  649. return self.act(self.attn(self.reparam_conv(x)))
  650. if self.identity is None:
  651. x = self.conv_1x1(x) + self.conv_kxk(x)
  652. else:
  653. identity = self.identity(x)
  654. x = self.conv_1x1(x) + self.conv_kxk(x)
  655. x = self.drop_path(x) # not in the paper / official impl, experimental
  656. x += identity
  657. x = self.attn(x) # no attn in the paper / official impl, experimental
  658. return self.act(x)
  659. def reparameterize(self):
  660. """ Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
  661. https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
  662. architecture used at training time to obtain a plain CNN-like structure
  663. for inference.
  664. """
  665. if self.reparam_conv is not None:
  666. return
  667. kernel, bias = self._get_kernel_bias()
  668. self.reparam_conv = nn.Conv2d(
  669. in_channels=self.conv_kxk.conv.in_channels,
  670. out_channels=self.conv_kxk.conv.out_channels,
  671. kernel_size=self.conv_kxk.conv.kernel_size,
  672. stride=self.conv_kxk.conv.stride,
  673. padding=self.conv_kxk.conv.padding,
  674. dilation=self.conv_kxk.conv.dilation,
  675. groups=self.conv_kxk.conv.groups,
  676. bias=True,
  677. )
  678. self.reparam_conv.weight.data = kernel
  679. self.reparam_conv.bias.data = bias
  680. # Delete un-used branches
  681. for name, para in self.named_parameters():
  682. if 'reparam_conv' in name:
  683. continue
  684. para.detach_()
  685. self.__delattr__('conv_kxk')
  686. self.__delattr__('conv_1x1')
  687. self.__delattr__('identity')
  688. self.__delattr__('drop_path')
  689. def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
  690. """ Method to obtain re-parameterized kernel and bias.
  691. Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83
  692. """
  693. # get weights and bias of scale branch
  694. kernel_1x1 = 0
  695. bias_1x1 = 0
  696. if self.conv_1x1 is not None:
  697. kernel_1x1, bias_1x1 = self._fuse_bn_tensor(self.conv_1x1)
  698. # Pad scale branch kernel to match conv branch kernel size.
  699. pad = self.conv_kxk.conv.kernel_size[0] // 2
  700. kernel_1x1 = torch.nn.functional.pad(kernel_1x1, [pad, pad, pad, pad])
  701. # get weights and bias of skip branch
  702. kernel_identity = 0
  703. bias_identity = 0
  704. if self.identity is not None:
  705. kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
  706. # get weights and bias of conv branches
  707. kernel_conv, bias_conv = self._fuse_bn_tensor(self.conv_kxk)
  708. kernel_final = kernel_conv + kernel_1x1 + kernel_identity
  709. bias_final = bias_conv + bias_1x1 + bias_identity
  710. return kernel_final, bias_final
  711. def _fuse_bn_tensor(self, branch) -> Tuple[torch.Tensor, torch.Tensor]:
  712. """ Method to fuse batchnorm layer with preceding conv layer.
  713. Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95
  714. """
  715. if isinstance(branch, ConvNormAct):
  716. kernel = branch.conv.weight
  717. running_mean = branch.bn.running_mean
  718. running_var = branch.bn.running_var
  719. gamma = branch.bn.weight
  720. beta = branch.bn.bias
  721. eps = branch.bn.eps
  722. else:
  723. assert isinstance(branch, nn.BatchNorm2d)
  724. if not hasattr(self, 'id_tensor'):
  725. in_chs = self.conv_kxk.conv.in_channels
  726. input_dim = in_chs // self.groups
  727. kernel_size = self.conv_kxk.conv.kernel_size
  728. kernel_value = torch.zeros_like(self.conv_kxk.conv.weight)
  729. for i in range(in_chs):
  730. kernel_value[i, i % input_dim, kernel_size[0] // 2, kernel_size[1] // 2] = 1
  731. self.id_tensor = kernel_value
  732. kernel = self.id_tensor
  733. running_mean = branch.running_mean
  734. running_var = branch.running_var
  735. gamma = branch.weight
  736. beta = branch.bias
  737. eps = branch.eps
  738. std = (running_var + eps).sqrt()
  739. t = (gamma / std).reshape(-1, 1, 1, 1)
  740. return kernel * t, beta - running_mean * gamma / std
  741. class MobileOneBlock(nn.Module):
  742. """ MobileOne building block.
  743. This block has a multi-branched architecture at train-time
  744. and plain-CNN style architecture at inference time
  745. For more details, please refer to our paper:
  746. `An Improved One millisecond Mobile Backbone` -
  747. https://arxiv.org/pdf/2206.04040.pdf
  748. """
  749. def __init__(
  750. self,
  751. in_chs: int,
  752. out_chs: int,
  753. kernel_size: int = 3,
  754. stride: int = 1,
  755. dilation: Tuple[int, int] = (1, 1),
  756. bottle_ratio: float = 1.0, # unused
  757. group_size: Optional[int] = None,
  758. downsample: str = '', # unused
  759. inference_mode: bool = False,
  760. num_conv_branches: int = 1,
  761. layers: LayerFn = None,
  762. drop_block: Callable = None,
  763. drop_path_rate: float = 0.,
  764. device=None,
  765. dtype=None,
  766. ) -> None:
  767. """ Construct a MobileOneBlock module.
  768. """
  769. dd = {'device': device, 'dtype': dtype}
  770. super().__init__()
  771. self.num_conv_branches = num_conv_branches
  772. self.groups = groups = num_groups(group_size, in_chs)
  773. layers = layers or LayerFn()
  774. if inference_mode:
  775. self.reparam_conv = nn.Conv2d(
  776. in_channels=in_chs,
  777. out_channels=out_chs,
  778. kernel_size=kernel_size,
  779. stride=stride,
  780. dilation=dilation,
  781. groups=groups,
  782. bias=True,
  783. **dd,
  784. )
  785. else:
  786. self.reparam_conv = None
  787. # Re-parameterizable skip connection
  788. use_ident = in_chs == out_chs and stride == 1 and dilation[0] == dilation[1]
  789. self.identity = layers.norm_act(out_chs, apply_act=False, **dd) if use_ident else None
  790. # Re-parameterizable conv branches
  791. convs = []
  792. for _ in range(self.num_conv_branches):
  793. convs.append(layers.conv_norm_act(
  794. in_chs,
  795. out_chs,
  796. kernel_size=kernel_size,
  797. stride=stride,
  798. groups=groups,
  799. apply_act=False,
  800. **dd,
  801. ))
  802. self.conv_kxk = nn.ModuleList(convs)
  803. # Re-parameterizable scale branch
  804. self.conv_scale = None
  805. if kernel_size > 1:
  806. self.conv_scale = layers.conv_norm_act(
  807. in_chs,
  808. out_chs,
  809. kernel_size=1,
  810. stride=stride,
  811. groups=groups,
  812. apply_act=False,
  813. **dd,
  814. )
  815. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. and use_ident else nn.Identity()
  816. self.attn = nn.Identity() if layers.attn is None else layers.attn(out_chs, **dd)
  817. self.act = layers.act(inplace=True)
  818. def forward(self, x: torch.Tensor) -> torch.Tensor:
  819. """ Apply forward pass. """
  820. # Inference mode forward pass.
  821. if self.reparam_conv is not None:
  822. return self.act(self.attn(self.reparam_conv(x)))
  823. # Multi-branched train-time forward pass.
  824. # Skip branch output
  825. identity_out = 0
  826. if self.identity is not None:
  827. identity_out = self.identity(x)
  828. # Scale branch output
  829. scale_out = 0
  830. if self.conv_scale is not None:
  831. scale_out = self.conv_scale(x)
  832. # Other branches
  833. out = scale_out
  834. for ck in self.conv_kxk:
  835. out += ck(x)
  836. out = self.drop_path(out)
  837. out += identity_out
  838. return self.act(self.attn(out))
  839. def reparameterize(self):
  840. """ Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
  841. https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
  842. architecture used at training time to obtain a plain CNN-like structure
  843. for inference.
  844. """
  845. if self.reparam_conv is not None:
  846. return
  847. kernel, bias = self._get_kernel_bias()
  848. self.reparam_conv = nn.Conv2d(
  849. in_channels=self.conv_kxk[0].conv.in_channels,
  850. out_channels=self.conv_kxk[0].conv.out_channels,
  851. kernel_size=self.conv_kxk[0].conv.kernel_size,
  852. stride=self.conv_kxk[0].conv.stride,
  853. padding=self.conv_kxk[0].conv.padding,
  854. dilation=self.conv_kxk[0].conv.dilation,
  855. groups=self.conv_kxk[0].conv.groups,
  856. bias=True)
  857. self.reparam_conv.weight.data = kernel
  858. self.reparam_conv.bias.data = bias
  859. # Delete un-used branches
  860. for name, para in self.named_parameters():
  861. if 'reparam_conv' in name:
  862. continue
  863. para.detach_()
  864. self.__delattr__('conv_kxk')
  865. self.__delattr__('conv_scale')
  866. self.__delattr__('identity')
  867. self.__delattr__('drop_path')
  868. def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
  869. """ Method to obtain re-parameterized kernel and bias.
  870. Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83
  871. """
  872. # get weights and bias of scale branch
  873. kernel_scale = 0
  874. bias_scale = 0
  875. if self.conv_scale is not None:
  876. kernel_scale, bias_scale = self._fuse_bn_tensor(self.conv_scale)
  877. # Pad scale branch kernel to match conv branch kernel size.
  878. pad = self.conv_kxk[0].conv.kernel_size[0] // 2
  879. kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad])
  880. # get weights and bias of skip branch
  881. kernel_identity = 0
  882. bias_identity = 0
  883. if self.identity is not None:
  884. kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
  885. # get weights and bias of conv branches
  886. kernel_conv = 0
  887. bias_conv = 0
  888. for ix in range(self.num_conv_branches):
  889. _kernel, _bias = self._fuse_bn_tensor(self.conv_kxk[ix])
  890. kernel_conv += _kernel
  891. bias_conv += _bias
  892. kernel_final = kernel_conv + kernel_scale + kernel_identity
  893. bias_final = bias_conv + bias_scale + bias_identity
  894. return kernel_final, bias_final
  895. def _fuse_bn_tensor(self, branch) -> Tuple[torch.Tensor, torch.Tensor]:
  896. """ Method to fuse batchnorm layer with preceding conv layer.
  897. Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95
  898. """
  899. if isinstance(branch, ConvNormAct):
  900. kernel = branch.conv.weight
  901. running_mean = branch.bn.running_mean
  902. running_var = branch.bn.running_var
  903. gamma = branch.bn.weight
  904. beta = branch.bn.bias
  905. eps = branch.bn.eps
  906. else:
  907. assert isinstance(branch, nn.BatchNorm2d)
  908. if not hasattr(self, 'id_tensor'):
  909. in_chs = self.conv_kxk[0].conv.in_channels
  910. input_dim = in_chs // self.groups
  911. kernel_size = self.conv_kxk[0].conv.kernel_size
  912. kernel_value = torch.zeros_like(self.conv_kxk[0].conv.weight)
  913. for i in range(in_chs):
  914. kernel_value[i, i % input_dim, kernel_size[0] // 2, kernel_size[1] // 2] = 1
  915. self.id_tensor = kernel_value
  916. kernel = self.id_tensor
  917. running_mean = branch.running_mean
  918. running_var = branch.running_var
  919. gamma = branch.weight
  920. beta = branch.bias
  921. eps = branch.eps
  922. std = (running_var + eps).sqrt()
  923. t = (gamma / std).reshape(-1, 1, 1, 1)
  924. return kernel * t, beta - running_mean * gamma / std
  925. class SelfAttnBlock(nn.Module):
  926. """ ResNet-like Bottleneck Block - 1x1 - optional kxk - self attn - 1x1
  927. """
  928. def __init__(
  929. self,
  930. in_chs: int,
  931. out_chs: int,
  932. kernel_size: int = 3,
  933. stride: int = 1,
  934. dilation: Tuple[int, int] = (1, 1),
  935. bottle_ratio: float = 1.,
  936. group_size: Optional[int] = None,
  937. downsample: str = 'avg',
  938. extra_conv: bool = False,
  939. linear_out: bool = False,
  940. bottle_in: bool = False,
  941. post_attn_na: bool = True,
  942. feat_size: Optional[Tuple[int, int]] = None,
  943. layers: LayerFn = None,
  944. drop_block: Callable = None,
  945. drop_path_rate: float = 0.,
  946. device=None,
  947. dtype=None,
  948. ):
  949. dd = {'device': device, 'dtype': dtype}
  950. super().__init__()
  951. assert layers is not None
  952. mid_chs = make_divisible((in_chs if bottle_in else out_chs) * bottle_ratio)
  953. groups = num_groups(group_size, mid_chs)
  954. self.shortcut = create_shortcut(
  955. downsample,
  956. in_chs,
  957. out_chs,
  958. stride=stride,
  959. dilation=dilation,
  960. apply_act=False,
  961. layers=layers,
  962. **dd,
  963. )
  964. self.conv1_1x1 = layers.conv_norm_act(in_chs, mid_chs, 1, **dd)
  965. if extra_conv:
  966. self.conv2_kxk = layers.conv_norm_act(
  967. mid_chs,
  968. mid_chs,
  969. kernel_size,
  970. stride=stride,
  971. dilation=dilation[0],
  972. groups=groups,
  973. drop_layer=drop_block,
  974. **dd,
  975. )
  976. stride = 1 # striding done via conv if enabled
  977. else:
  978. self.conv2_kxk = nn.Identity()
  979. opt_kwargs = {} if feat_size is None else dict(feat_size=feat_size)
  980. # FIXME need to dilate self attn to have dilated network support, moop moop
  981. self.self_attn = layers.self_attn(mid_chs, stride=stride, **opt_kwargs, **dd)
  982. self.post_attn = layers.norm_act(mid_chs, **dd) if post_attn_na else nn.Identity()
  983. self.conv3_1x1 = layers.conv_norm_act(mid_chs, out_chs, 1, apply_act=False, **dd)
  984. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  985. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  986. def init_weights(self, zero_init_last: bool = False):
  987. if zero_init_last and self.shortcut is not None and getattr(self.conv3_1x1.bn, 'weight', None) is not None:
  988. nn.init.zeros_(self.conv3_1x1.bn.weight)
  989. if hasattr(self.self_attn, 'reset_parameters'):
  990. self.self_attn.reset_parameters()
  991. def forward(self, x):
  992. shortcut = x
  993. x = self.conv1_1x1(x)
  994. x = self.conv2_kxk(x)
  995. x = self.self_attn(x)
  996. x = self.post_attn(x)
  997. x = self.conv3_1x1(x)
  998. x = self.drop_path(x)
  999. if self.shortcut is not None:
  1000. x = x + self.shortcut(shortcut)
  1001. return self.act(x)
  1002. _block_registry = dict(
  1003. basic=BasicBlock,
  1004. bottle=BottleneckBlock,
  1005. dark=DarkBlock,
  1006. edge=EdgeBlock,
  1007. rep=RepVggBlock,
  1008. one=MobileOneBlock,
  1009. self_attn=SelfAttnBlock,
  1010. )
  1011. def register_block(block_type: str, block_fn: nn.Module):
  1012. _block_registry[block_type] = block_fn
  1013. def create_block(block: Union[str, nn.Module], **kwargs):
  1014. if isinstance(block, (nn.Module, partial)):
  1015. return block(**kwargs)
  1016. assert block in _block_registry, f'Unknown block type ({block}'
  1017. return _block_registry[block](**kwargs)
  1018. class Stem(nn.Sequential):
  1019. def __init__(
  1020. self,
  1021. in_chs: int,
  1022. out_chs: Union[int, List[int], Tuple[int, ...]],
  1023. kernel_size: int = 3,
  1024. stride: int = 4,
  1025. pool: str = 'maxpool',
  1026. num_rep: int = 3,
  1027. num_act: Optional[int] = None,
  1028. chs_decay: float = 0.5,
  1029. layers: LayerFn = None,
  1030. device=None,
  1031. dtype=None,
  1032. ):
  1033. dd = {'device': device, 'dtype': dtype}
  1034. super().__init__()
  1035. assert stride in (2, 4)
  1036. layers = layers or LayerFn()
  1037. if isinstance(out_chs, (list, tuple)):
  1038. num_rep = len(out_chs)
  1039. stem_chs = out_chs
  1040. else:
  1041. stem_chs = [round(out_chs * chs_decay ** i) for i in range(num_rep)][::-1]
  1042. self.stride = stride
  1043. self.feature_info = [] # track intermediate features
  1044. prev_feat = ''
  1045. stem_strides = [2] + [1] * (num_rep - 1)
  1046. if stride == 4 and not pool:
  1047. # set last conv in stack to be strided if stride == 4 and no pooling layer
  1048. stem_strides[-1] = 2
  1049. num_act = num_rep if num_act is None else num_act
  1050. # if num_act < num_rep, first convs in stack won't have bn + act
  1051. stem_norm_acts = [False] * (num_rep - num_act) + [True] * num_act
  1052. prev_chs = in_chs
  1053. curr_stride = 1
  1054. last_feat_idx = -1
  1055. for i, (ch, s, na) in enumerate(zip(stem_chs, stem_strides, stem_norm_acts)):
  1056. layer_fn = layers.conv_norm_act if na else create_conv2d
  1057. conv_name = f'conv{i + 1}'
  1058. if i > 0 and s > 1:
  1059. last_feat_idx = i - 1
  1060. self.feature_info.append(dict(num_chs=prev_chs, reduction=curr_stride, module=prev_feat, stage=0))
  1061. self.add_module(conv_name, layer_fn(prev_chs, ch, kernel_size=kernel_size, stride=s, **dd))
  1062. prev_chs = ch
  1063. curr_stride *= s
  1064. prev_feat = conv_name
  1065. if pool:
  1066. pool = pool.lower()
  1067. assert pool in ('max', 'maxpool', 'avg', 'avgpool', 'max2', 'avg2')
  1068. last_feat_idx = i
  1069. self.feature_info.append(dict(num_chs=prev_chs, reduction=curr_stride, module=prev_feat, stage=0))
  1070. if pool == 'max2':
  1071. self.add_module('pool', nn.MaxPool2d(2))
  1072. elif pool == 'avg2':
  1073. self.add_module('pool', nn.AvgPool2d(2))
  1074. elif 'max' in pool:
  1075. self.add_module('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  1076. elif 'avg' in pool:
  1077. self.add_module('pool', nn.AvgPool2d(kernel_size=3, stride=2, padding=1, count_include_pad=False))
  1078. curr_stride *= 2
  1079. prev_feat = 'pool'
  1080. self.last_feat_idx = last_feat_idx if last_feat_idx >= 0 else None
  1081. self.feature_info.append(dict(num_chs=prev_chs, reduction=curr_stride, module=prev_feat, stage=0))
  1082. assert curr_stride == stride
  1083. def forward_intermediates(self, x) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
  1084. intermediate: Optional[torch.Tensor] = None
  1085. for i, m in enumerate(self):
  1086. x = m(x)
  1087. if self.last_feat_idx is not None and i == self.last_feat_idx:
  1088. intermediate = x
  1089. return x, intermediate
  1090. def create_byob_stem(
  1091. in_chs: int,
  1092. out_chs: int,
  1093. stem_type: str = '',
  1094. pool_type: str = '',
  1095. feat_prefix: str = 'stem',
  1096. layers: LayerFn = None,
  1097. device=None,
  1098. dtype=None,
  1099. ):
  1100. dd = {'device': device, 'dtype': dtype}
  1101. layers = layers or LayerFn()
  1102. assert stem_type in ('', 'quad', 'quad2', 'tiered', 'deep', 'rep', 'one', '7x7', '3x3')
  1103. if 'quad' in stem_type:
  1104. # based on NFNet stem, stack of 4 3x3 convs
  1105. num_act = 2 if 'quad2' in stem_type else None
  1106. stem = Stem(in_chs, out_chs, num_rep=4, num_act=num_act, pool=pool_type, layers=layers, **dd)
  1107. elif 'tiered' in stem_type:
  1108. # 3x3 stack of 3 convs as in my ResNet-T
  1109. stem = Stem(in_chs, (3 * out_chs // 8, out_chs // 2, out_chs), pool=pool_type, layers=layers, **dd)
  1110. elif 'deep' in stem_type:
  1111. # 3x3 stack of 3 convs as in ResNet-D
  1112. stem = Stem(in_chs, out_chs, num_rep=3, chs_decay=1.0, pool=pool_type, layers=layers, **dd)
  1113. elif 'rep' in stem_type:
  1114. stem = RepVggBlock(in_chs, out_chs, stride=2, layers=layers, **dd)
  1115. elif 'one' in stem_type:
  1116. stem = MobileOneBlock(in_chs, out_chs, kernel_size=3, stride=2, layers=layers, **dd)
  1117. elif '7x7' in stem_type:
  1118. # 7x7 stem conv as in ResNet
  1119. if pool_type:
  1120. stem = Stem(in_chs, out_chs, 7, num_rep=1, pool=pool_type, layers=layers, **dd)
  1121. else:
  1122. stem = layers.conv_norm_act(in_chs, out_chs, 7, stride=2, **dd)
  1123. else:
  1124. if isinstance(out_chs, (tuple, list)):
  1125. stem = Stem(in_chs, out_chs, 3, pool=pool_type, layers=layers, **dd)
  1126. else:
  1127. # 3x3 stem conv as in RegNet is the default
  1128. if pool_type:
  1129. stem = Stem(in_chs, out_chs, 3, num_rep=1, pool=pool_type, layers=layers, **dd)
  1130. else:
  1131. stem = layers.conv_norm_act(in_chs, out_chs, 3, stride=2, **dd)
  1132. if isinstance(stem, Stem):
  1133. feature_info = [dict(f, module='.'.join([feat_prefix, f['module']])) for f in stem.feature_info]
  1134. else:
  1135. feature_info = [dict(num_chs=out_chs, reduction=2, module=feat_prefix, stage=0)]
  1136. return stem, feature_info
  1137. def reduce_feat_size(feat_size, stride=2):
  1138. return None if feat_size is None else tuple([s // stride for s in feat_size])
  1139. def override_kwargs(block_kwargs, model_kwargs):
  1140. """ Override model level attn/self-attn/block kwargs w/ block level
  1141. NOTE: kwargs are NOT merged across levels, block_kwargs will fully replace model_kwargs
  1142. for the block if set to anything that isn't None.
  1143. i.e. an empty block_kwargs dict will remove kwargs set at model level for that block
  1144. """
  1145. out_kwargs = block_kwargs if block_kwargs is not None else model_kwargs
  1146. return out_kwargs or {} # make sure None isn't returned
  1147. def update_block_kwargs(block_kwargs: Dict[str, Any], block_cfg: ByoBlockCfg, model_cfg: ByoModelCfg, ):
  1148. layer_fns = block_kwargs['layers']
  1149. # override attn layer / args with block local config
  1150. attn_set = block_cfg.attn_layer is not None
  1151. if attn_set or block_cfg.attn_kwargs is not None:
  1152. # override attn layer config
  1153. if attn_set and not block_cfg.attn_layer:
  1154. # empty string for attn_layer type will disable attn for this block
  1155. attn_layer = None
  1156. else:
  1157. attn_kwargs = override_kwargs(block_cfg.attn_kwargs, model_cfg.attn_kwargs)
  1158. attn_layer = block_cfg.attn_layer or model_cfg.attn_layer
  1159. attn_layer = partial(get_attn(attn_layer), **attn_kwargs) if attn_layer is not None else None
  1160. layer_fns = replace(layer_fns, attn=attn_layer)
  1161. # override self-attn layer / args with block local cfg
  1162. self_attn_set = block_cfg.self_attn_layer is not None
  1163. if self_attn_set or block_cfg.self_attn_kwargs is not None:
  1164. # override attn layer config
  1165. if self_attn_set and not block_cfg.self_attn_layer: # attn_layer == ''
  1166. # empty string for self_attn_layer type will disable attn for this block
  1167. self_attn_layer = None
  1168. else:
  1169. self_attn_kwargs = override_kwargs(block_cfg.self_attn_kwargs, model_cfg.self_attn_kwargs)
  1170. self_attn_layer = block_cfg.self_attn_layer or model_cfg.self_attn_layer
  1171. self_attn_layer = partial(get_attn(self_attn_layer), **self_attn_kwargs) \
  1172. if self_attn_layer is not None else None
  1173. layer_fns = replace(layer_fns, self_attn=self_attn_layer)
  1174. block_kwargs['layers'] = layer_fns
  1175. # add additional block_kwargs specified in block_cfg or model_cfg, precedence to block if set
  1176. block_kwargs.update(override_kwargs(block_cfg.block_kwargs, model_cfg.block_kwargs))
  1177. def create_byob_stages(
  1178. cfg: ByoModelCfg,
  1179. drop_path_rate: float,
  1180. output_stride: int,
  1181. stem_feat: Dict[str, Any],
  1182. feat_size: Optional[int] = None,
  1183. layers: Optional[LayerFn] = None,
  1184. block_kwargs_fn: Optional[Callable] = update_block_kwargs,
  1185. device=None,
  1186. dtype=None,
  1187. ):
  1188. layers = layers or LayerFn()
  1189. feature_info = []
  1190. block_cfgs = [expand_blocks_cfg(s) for s in cfg.blocks]
  1191. depths = [sum([bc.d for bc in stage_bcs]) for stage_bcs in block_cfgs]
  1192. dpr = calculate_drop_path_rates(drop_path_rate, depths, stagewise=True)
  1193. dilation = 1
  1194. net_stride = stem_feat['reduction']
  1195. prev_chs = stem_feat['num_chs']
  1196. prev_feat = stem_feat
  1197. stages = []
  1198. for stage_idx, stage_block_cfgs in enumerate(block_cfgs):
  1199. stride = stage_block_cfgs[0].s
  1200. if stride != 1 and prev_feat:
  1201. feature_info.append(prev_feat)
  1202. if net_stride >= output_stride and stride > 1:
  1203. dilation *= stride
  1204. stride = 1
  1205. net_stride *= stride
  1206. first_dilation = 1 if dilation in (1, 2) else 2
  1207. blocks = []
  1208. for block_idx, block_cfg in enumerate(stage_block_cfgs):
  1209. out_chs = make_divisible(block_cfg.c * cfg.width_factor)
  1210. group_size = block_cfg.gs
  1211. if isinstance(group_size, Callable):
  1212. group_size = group_size(out_chs, block_idx)
  1213. block_kwargs = dict( # Blocks used in this model must accept these arguments
  1214. in_chs=prev_chs,
  1215. out_chs=out_chs,
  1216. stride=stride if block_idx == 0 else 1,
  1217. dilation=(first_dilation, dilation),
  1218. group_size=group_size,
  1219. bottle_ratio=block_cfg.br,
  1220. downsample=cfg.downsample,
  1221. drop_path_rate=dpr[stage_idx][block_idx],
  1222. layers=layers,
  1223. device=device,
  1224. dtype=dtype,
  1225. )
  1226. if block_cfg.type in ('self_attn',):
  1227. # add feat_size arg for blocks that support/need it
  1228. block_kwargs['feat_size'] = feat_size
  1229. block_kwargs_fn(block_kwargs, block_cfg=block_cfg, model_cfg=cfg)
  1230. blocks += [create_block(block_cfg.type, **block_kwargs)]
  1231. first_dilation = dilation
  1232. prev_chs = out_chs
  1233. if stride > 1 and block_idx == 0:
  1234. feat_size = reduce_feat_size(feat_size, stride)
  1235. stages += [nn.Sequential(*blocks)]
  1236. prev_feat = dict(num_chs=prev_chs, reduction=net_stride, module=f'stages.{stage_idx}', stage=stage_idx + 1)
  1237. feature_info.append(prev_feat)
  1238. return nn.Sequential(*stages), feature_info, feat_size
  1239. def get_layer_fns(cfg: ByoModelCfg, allow_aa: bool = True):
  1240. act = get_act_layer(cfg.act_layer)
  1241. norm_act = get_norm_act_layer(norm_layer=cfg.norm_layer, act_layer=act)
  1242. if cfg.aa_layer and allow_aa:
  1243. conv_norm_act = partial(ConvNormAct, norm_layer=cfg.norm_layer, act_layer=act, aa_layer=cfg.aa_layer)
  1244. else:
  1245. conv_norm_act = partial(ConvNormAct, norm_layer=cfg.norm_layer, act_layer=act)
  1246. attn = partial(get_attn(cfg.attn_layer), **cfg.attn_kwargs) if cfg.attn_layer else None
  1247. self_attn = partial(get_attn(cfg.self_attn_layer), **cfg.self_attn_kwargs) if cfg.self_attn_layer else None
  1248. layer_fn = LayerFn(conv_norm_act=conv_norm_act, norm_act=norm_act, act=act, attn=attn, self_attn=self_attn)
  1249. return layer_fn
  1250. class ByobNet(nn.Module):
  1251. """Bring-your-own-blocks Network.
  1252. A flexible network backbone that allows building model stem + blocks via
  1253. dataclass cfg definition w/ factory functions for module instantiation.
  1254. Current assumption is that both stem and blocks are in conv-bn-act order (w/ block ending in act).
  1255. """
  1256. def __init__(
  1257. self,
  1258. cfg: ByoModelCfg,
  1259. num_classes: int = 1000,
  1260. in_chans: int = 3,
  1261. global_pool: Optional[str] = None,
  1262. output_stride: int = 32,
  1263. img_size: Optional[Union[int, Tuple[int, int]]] = None,
  1264. drop_rate: float = 0.,
  1265. drop_path_rate: float = 0.,
  1266. zero_init_last: bool = True,
  1267. device=None,
  1268. dtype=None,
  1269. **kwargs,
  1270. ):
  1271. """
  1272. Args:
  1273. cfg: Model architecture configuration.
  1274. num_classes: Number of classifier classes.
  1275. in_chans: Number of input channels.
  1276. global_pool: Global pooling type.
  1277. output_stride: Output stride of network, one of (8, 16, 32).
  1278. img_size: Image size for fixed image size models (i.e. self-attn).
  1279. drop_rate: Classifier dropout rate.
  1280. drop_path_rate: Stochastic depth drop-path rate.
  1281. zero_init_last: Zero-init last weight of residual path.
  1282. **kwargs: Extra kwargs overlayed onto cfg.
  1283. """
  1284. super().__init__()
  1285. dd = {'device': device, 'dtype': dtype}
  1286. self.num_classes = num_classes
  1287. self.drop_rate = drop_rate
  1288. self.grad_checkpointing = False
  1289. cfg = replace(cfg, **kwargs) # overlay kwargs onto cfg
  1290. stem_layers = get_layer_fns(cfg, allow_aa=False) # keep aa off for stem-layers
  1291. stage_layers = get_layer_fns(cfg)
  1292. if cfg.fixed_input_size:
  1293. assert img_size is not None, 'img_size argument is required for fixed input size model'
  1294. feat_size = to_2tuple(img_size) if img_size is not None else None
  1295. self.feature_info = []
  1296. if isinstance(cfg.stem_chs, (list, tuple)):
  1297. stem_chs = [int(round(c * cfg.width_factor)) for c in cfg.stem_chs]
  1298. else:
  1299. stem_chs = int(round((cfg.stem_chs or cfg.blocks[0].c) * cfg.width_factor))
  1300. self.stem, stem_feat = create_byob_stem(
  1301. in_chs=in_chans,
  1302. out_chs=stem_chs,
  1303. stem_type=cfg.stem_type,
  1304. pool_type=cfg.stem_pool,
  1305. layers=stem_layers,
  1306. **dd,
  1307. )
  1308. self.feature_info.extend(stem_feat[:-1])
  1309. feat_size = reduce_feat_size(feat_size, stride=stem_feat[-1]['reduction'])
  1310. self.stages, stage_feat, feat_size = create_byob_stages(
  1311. cfg,
  1312. drop_path_rate,
  1313. output_stride,
  1314. stem_feat[-1],
  1315. layers=stage_layers,
  1316. feat_size=feat_size,
  1317. **dd,
  1318. )
  1319. self.feature_info.extend(stage_feat[:-1])
  1320. reduction = stage_feat[-1]['reduction']
  1321. prev_chs = stage_feat[-1]['num_chs']
  1322. if cfg.num_features:
  1323. self.num_features = int(round(cfg.width_factor * cfg.num_features))
  1324. self.final_conv = stage_layers.conv_norm_act(prev_chs, self.num_features, 1, **dd)
  1325. else:
  1326. self.num_features = prev_chs
  1327. self.final_conv = nn.Identity()
  1328. self.feature_info += [
  1329. dict(num_chs=self.num_features, reduction=reduction, module='final_conv', stage=len(self.stages))]
  1330. self.stage_ends = [f['stage'] for f in self.feature_info]
  1331. self.head_hidden_size = self.num_features
  1332. assert cfg.head_type in ('', 'classifier', 'mlp', 'attn_abs', 'attn_rot')
  1333. if cfg.head_type == 'mlp':
  1334. if global_pool is None:
  1335. global_pool = 'avg'
  1336. self.head = NormMlpClassifierHead(
  1337. self.num_features,
  1338. num_classes,
  1339. hidden_size=cfg.head_hidden_size,
  1340. pool_type=global_pool,
  1341. norm_layer=cfg.norm_layer,
  1342. act_layer=cfg.act_layer,
  1343. drop_rate=self.drop_rate,
  1344. **dd,
  1345. )
  1346. self.head_hidden_size = self.head.hidden_size
  1347. elif cfg.head_type == 'attn_abs':
  1348. if global_pool is None:
  1349. global_pool = 'token'
  1350. assert global_pool in ('', 'token')
  1351. self.head = AttentionPool2d(
  1352. self.num_features,
  1353. embed_dim=cfg.head_hidden_size,
  1354. out_features=num_classes,
  1355. feat_size=feat_size,
  1356. pool_type=global_pool,
  1357. drop_rate=self.drop_rate,
  1358. qkv_separate=True,
  1359. **dd,
  1360. )
  1361. self.head_hidden_size = self.head.embed_dim
  1362. elif cfg.head_type == 'attn_rot':
  1363. if global_pool is None:
  1364. global_pool = 'token'
  1365. assert global_pool in ('', 'token')
  1366. self.head = RotAttentionPool2d(
  1367. self.num_features,
  1368. embed_dim=cfg.head_hidden_size,
  1369. out_features=num_classes,
  1370. ref_feat_size=feat_size,
  1371. pool_type=global_pool,
  1372. drop_rate=self.drop_rate,
  1373. qkv_separate=True,
  1374. **dd,
  1375. )
  1376. self.head_hidden_size = self.head.embed_dim
  1377. else:
  1378. if global_pool is None:
  1379. global_pool = 'avg'
  1380. assert cfg.head_hidden_size is None
  1381. self.head = ClassifierHead(
  1382. self.num_features,
  1383. num_classes,
  1384. pool_type=global_pool,
  1385. drop_rate=self.drop_rate,
  1386. **dd,
  1387. )
  1388. self.global_pool = global_pool
  1389. # init weights
  1390. named_apply(partial(_init_weights, zero_init_last=zero_init_last), self)
  1391. @torch.jit.ignore
  1392. def group_matcher(self, coarse: bool = False) -> Dict[str, Any]:
  1393. """Group matcher for parameter groups.
  1394. Args:
  1395. coarse: Whether to use coarse grouping.
  1396. Returns:
  1397. Dictionary mapping group names to patterns.
  1398. """
  1399. matcher = dict(
  1400. stem=r'^stem',
  1401. blocks=[
  1402. (r'^stages\.(\d+)' if coarse else r'^stages\.(\d+)\.(\d+)', None),
  1403. (r'^final_conv', (99999,))
  1404. ]
  1405. )
  1406. return matcher
  1407. @torch.jit.ignore
  1408. def set_grad_checkpointing(self, enable: bool = True) -> None:
  1409. """Enable or disable gradient checkpointing.
  1410. Args:
  1411. enable: Whether to enable gradient checkpointing.
  1412. """
  1413. self.grad_checkpointing = enable
  1414. @torch.jit.ignore
  1415. def get_classifier(self) -> nn.Module:
  1416. """Get classifier module.
  1417. Returns:
  1418. Classifier module.
  1419. """
  1420. return self.head.fc
  1421. def reset_classifier(self, num_classes: int, global_pool: Optional[str] = None) -> None:
  1422. """Reset classifier.
  1423. Args:
  1424. num_classes: Number of classes for new classifier.
  1425. global_pool: Global pooling type.
  1426. """
  1427. self.num_classes = num_classes
  1428. self.head.reset(num_classes, global_pool)
  1429. def forward_intermediates(
  1430. self,
  1431. x: torch.Tensor,
  1432. indices: Optional[Union[int, List[int]]] = None,
  1433. norm: bool = False,
  1434. stop_early: bool = False,
  1435. output_fmt: str = 'NCHW',
  1436. intermediates_only: bool = False,
  1437. exclude_final_conv: bool = False,
  1438. ) -> Union[List[torch.Tensor], Tuple[torch.Tensor, List[torch.Tensor]]]:
  1439. """ Forward features that returns intermediates.
  1440. Args:
  1441. x: Input image tensor
  1442. indices: Take last n blocks if int, all if None, select matching indices if sequence
  1443. norm: Apply norm layer to compatible intermediates
  1444. stop_early: Stop iterating over blocks when last desired intermediate hit
  1445. output_fmt: Shape of intermediate feature outputs
  1446. intermediates_only: Only return intermediate features
  1447. exclude_final_conv: Exclude final_conv from last intermediate
  1448. Returns:
  1449. """
  1450. assert output_fmt in ('NCHW',), 'Output shape must be NCHW.'
  1451. intermediates = []
  1452. take_indices, max_index = feature_take_indices(len(self.stage_ends), indices)
  1453. take_indices = [self.stage_ends[i] for i in take_indices]
  1454. max_index = self.stage_ends[max_index]
  1455. # forward pass
  1456. feat_idx = 0 # stem is index 0
  1457. if hasattr(self.stem, 'forward_intermediates'):
  1458. # returns last intermediate features in stem (before final stride in stride > 2 stems)
  1459. x, x_inter = self.stem.forward_intermediates(x)
  1460. else:
  1461. x, x_inter = self.stem(x), None
  1462. if feat_idx in take_indices:
  1463. intermediates.append(x if x_inter is None else x_inter)
  1464. last_idx = self.stage_ends[-1]
  1465. if torch.jit.is_scripting() or not stop_early: # can't slice blocks in torchscript
  1466. stages = self.stages
  1467. else:
  1468. stages = self.stages[:max_index]
  1469. for stage in stages:
  1470. feat_idx += 1
  1471. if self.grad_checkpointing and not torch.jit.is_scripting():
  1472. x = checkpoint_seq(stage, x)
  1473. else:
  1474. x = stage(x)
  1475. if not exclude_final_conv and feat_idx == last_idx:
  1476. # default feature_info for this model uses final_conv as the last feature output (if present)
  1477. x = self.final_conv(x)
  1478. if feat_idx in take_indices:
  1479. intermediates.append(x)
  1480. if intermediates_only:
  1481. return intermediates
  1482. if exclude_final_conv and feat_idx == last_idx:
  1483. x = self.final_conv(x)
  1484. return x, intermediates
  1485. def prune_intermediate_layers(
  1486. self,
  1487. indices: Union[int, List[int]] = 1,
  1488. prune_norm: bool = False,
  1489. prune_head: bool = True,
  1490. ) -> List[int]:
  1491. """Prune layers not required for specified intermediates.
  1492. Args:
  1493. indices: Indices of intermediate layers to keep.
  1494. prune_norm: Whether to prune normalization layer.
  1495. prune_head: Whether to prune the classifier head.
  1496. Returns:
  1497. List of indices that were kept.
  1498. """
  1499. take_indices, max_index = feature_take_indices(len(self.stage_ends), indices)
  1500. max_index = self.stage_ends[max_index]
  1501. self.stages = self.stages[:max_index] # truncate blocks w/ stem as idx 0
  1502. if max_index < self.stage_ends[-1]:
  1503. self.final_conv = nn.Identity()
  1504. if prune_head:
  1505. self.reset_classifier(0, '')
  1506. return take_indices
  1507. def forward_features(self, x: torch.Tensor) -> torch.Tensor:
  1508. """Forward pass through feature extraction.
  1509. Args:
  1510. x: Input tensor.
  1511. Returns:
  1512. Feature tensor.
  1513. """
  1514. x = self.stem(x)
  1515. if self.grad_checkpointing and not torch.jit.is_scripting():
  1516. x = checkpoint_seq(self.stages, x)
  1517. else:
  1518. x = self.stages(x)
  1519. x = self.final_conv(x)
  1520. return x
  1521. def forward_head(self, x: torch.Tensor, pre_logits: bool = False) -> torch.Tensor:
  1522. """Forward pass through head.
  1523. Args:
  1524. x: Input features.
  1525. pre_logits: Return features before final linear layer.
  1526. Returns:
  1527. Classification logits or features.
  1528. """
  1529. return self.head(x, pre_logits=pre_logits) if pre_logits else self.head(x)
  1530. def forward(self, x: torch.Tensor) -> torch.Tensor:
  1531. """Forward pass.
  1532. Args:
  1533. x: Input tensor.
  1534. Returns:
  1535. Output logits.
  1536. """
  1537. x = self.forward_features(x)
  1538. x = self.forward_head(x)
  1539. return x
  1540. def _init_weights(module: nn.Module, name: str = '', zero_init_last: bool = False) -> None:
  1541. """Initialize weights.
  1542. Args:
  1543. module: Module to initialize.
  1544. name: Module name.
  1545. zero_init_last: Zero-initialize last layer.
  1546. """
  1547. if isinstance(module, nn.Conv2d):
  1548. fan_out = module.kernel_size[0] * module.kernel_size[1] * module.out_channels
  1549. fan_out //= module.groups
  1550. module.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
  1551. if module.bias is not None:
  1552. module.bias.data.zero_()
  1553. elif isinstance(module, nn.Linear):
  1554. nn.init.normal_(module.weight, mean=0.0, std=0.01)
  1555. if module.bias is not None:
  1556. nn.init.zeros_(module.bias)
  1557. elif isinstance(module, nn.BatchNorm2d):
  1558. nn.init.ones_(module.weight)
  1559. nn.init.zeros_(module.bias)
  1560. elif hasattr(module, 'init_weights'):
  1561. module.init_weights(zero_init_last=zero_init_last)
  1562. model_cfgs = dict(
  1563. gernet_l=ByoModelCfg(
  1564. blocks=(
  1565. ByoBlockCfg(type='basic', d=1, c=128, s=2, gs=0, br=1.),
  1566. ByoBlockCfg(type='basic', d=2, c=192, s=2, gs=0, br=1.),
  1567. ByoBlockCfg(type='bottle', d=6, c=640, s=2, gs=0, br=1 / 4),
  1568. ByoBlockCfg(type='bottle', d=5, c=640, s=2, gs=1, br=3.),
  1569. ByoBlockCfg(type='bottle', d=4, c=640, s=1, gs=1, br=3.),
  1570. ),
  1571. stem_chs=32,
  1572. stem_pool=None,
  1573. num_features=2560,
  1574. ),
  1575. gernet_m=ByoModelCfg(
  1576. blocks=(
  1577. ByoBlockCfg(type='basic', d=1, c=128, s=2, gs=0, br=1.),
  1578. ByoBlockCfg(type='basic', d=2, c=192, s=2, gs=0, br=1.),
  1579. ByoBlockCfg(type='bottle', d=6, c=640, s=2, gs=0, br=1 / 4),
  1580. ByoBlockCfg(type='bottle', d=4, c=640, s=2, gs=1, br=3.),
  1581. ByoBlockCfg(type='bottle', d=1, c=640, s=1, gs=1, br=3.),
  1582. ),
  1583. stem_chs=32,
  1584. stem_pool=None,
  1585. num_features=2560,
  1586. ),
  1587. gernet_s=ByoModelCfg(
  1588. blocks=(
  1589. ByoBlockCfg(type='basic', d=1, c=48, s=2, gs=0, br=1.),
  1590. ByoBlockCfg(type='basic', d=3, c=48, s=2, gs=0, br=1.),
  1591. ByoBlockCfg(type='bottle', d=7, c=384, s=2, gs=0, br=1 / 4),
  1592. ByoBlockCfg(type='bottle', d=2, c=560, s=2, gs=1, br=3.),
  1593. ByoBlockCfg(type='bottle', d=1, c=256, s=1, gs=1, br=3.),
  1594. ),
  1595. stem_chs=13,
  1596. stem_pool=None,
  1597. num_features=1920,
  1598. ),
  1599. repvgg_a0=ByoModelCfg(
  1600. blocks=_rep_vgg_bcfg(d=(2, 4, 14, 1), wf=(0.75, 0.75, 0.75, 2.5)),
  1601. stem_type='rep',
  1602. stem_chs=48,
  1603. ),
  1604. repvgg_a1=ByoModelCfg(
  1605. blocks=_rep_vgg_bcfg(d=(2, 4, 14, 1), wf=(1, 1, 1, 2.5)),
  1606. stem_type='rep',
  1607. stem_chs=64,
  1608. ),
  1609. repvgg_a2=ByoModelCfg(
  1610. blocks=_rep_vgg_bcfg(d=(2, 4, 14, 1), wf=(1.5, 1.5, 1.5, 2.75)),
  1611. stem_type='rep',
  1612. stem_chs=64,
  1613. ),
  1614. repvgg_b0=ByoModelCfg(
  1615. blocks=_rep_vgg_bcfg(wf=(1., 1., 1., 2.5)),
  1616. stem_type='rep',
  1617. stem_chs=64,
  1618. ),
  1619. repvgg_b1=ByoModelCfg(
  1620. blocks=_rep_vgg_bcfg(wf=(2., 2., 2., 4.)),
  1621. stem_type='rep',
  1622. stem_chs=64,
  1623. ),
  1624. repvgg_b1g4=ByoModelCfg(
  1625. blocks=_rep_vgg_bcfg(wf=(2., 2., 2., 4.), groups=4),
  1626. stem_type='rep',
  1627. stem_chs=64,
  1628. ),
  1629. repvgg_b2=ByoModelCfg(
  1630. blocks=_rep_vgg_bcfg(wf=(2.5, 2.5, 2.5, 5.)),
  1631. stem_type='rep',
  1632. stem_chs=64,
  1633. ),
  1634. repvgg_b2g4=ByoModelCfg(
  1635. blocks=_rep_vgg_bcfg(wf=(2.5, 2.5, 2.5, 5.), groups=4),
  1636. stem_type='rep',
  1637. stem_chs=64,
  1638. ),
  1639. repvgg_b3=ByoModelCfg(
  1640. blocks=_rep_vgg_bcfg(wf=(3., 3., 3., 5.)),
  1641. stem_type='rep',
  1642. stem_chs=64,
  1643. ),
  1644. repvgg_b3g4=ByoModelCfg(
  1645. blocks=_rep_vgg_bcfg(wf=(3., 3., 3., 5.), groups=4),
  1646. stem_type='rep',
  1647. stem_chs=64,
  1648. ),
  1649. repvgg_d2se=ByoModelCfg(
  1650. blocks=_rep_vgg_bcfg(d=(8, 14, 24, 1), wf=(2.5, 2.5, 2.5, 5.)),
  1651. stem_type='rep',
  1652. stem_chs=64,
  1653. attn_layer='se',
  1654. attn_kwargs=dict(rd_ratio=0.0625, rd_divisor=1),
  1655. ),
  1656. # 4 x conv stem w/ 2 act, no maxpool, 2,4,6,4 repeats, group size 32 in first 3 blocks
  1657. # DW convs in last block, 2048 pre-FC, silu act
  1658. resnet51q=ByoModelCfg(
  1659. blocks=(
  1660. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1661. ByoBlockCfg(type='bottle', d=4, c=512, s=2, gs=32, br=0.25),
  1662. ByoBlockCfg(type='bottle', d=6, c=1536, s=2, gs=32, br=0.25),
  1663. ByoBlockCfg(type='bottle', d=4, c=1536, s=2, gs=1, br=1.0),
  1664. ),
  1665. stem_chs=128,
  1666. stem_type='quad2',
  1667. stem_pool=None,
  1668. num_features=2048,
  1669. act_layer='silu',
  1670. ),
  1671. # 4 x conv stem w/ 4 act, no maxpool, 1,4,6,4 repeats, edge block first, group size 32 in next 2 blocks
  1672. # DW convs in last block, 4 conv for each bottle block, 2048 pre-FC, silu act
  1673. resnet61q=ByoModelCfg(
  1674. blocks=(
  1675. ByoBlockCfg(type='edge', d=1, c=256, s=1, gs=0, br=1.0, block_kwargs=dict()),
  1676. ByoBlockCfg(type='bottle', d=4, c=512, s=2, gs=32, br=0.25),
  1677. ByoBlockCfg(type='bottle', d=6, c=1536, s=2, gs=32, br=0.25),
  1678. ByoBlockCfg(type='bottle', d=4, c=1536, s=2, gs=1, br=1.0),
  1679. ),
  1680. stem_chs=128,
  1681. stem_type='quad',
  1682. stem_pool=None,
  1683. num_features=2048,
  1684. act_layer='silu',
  1685. block_kwargs=dict(extra_conv=True),
  1686. ),
  1687. # A series of ResNeXt-26 models w/ one of none, GC, SE, ECA, BAT attn, group size 32, SiLU act,
  1688. # and a tiered stem w/ maxpool
  1689. resnext26ts=ByoModelCfg(
  1690. blocks=(
  1691. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1692. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1693. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1694. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1695. ),
  1696. stem_chs=64,
  1697. stem_type='tiered',
  1698. stem_pool='maxpool',
  1699. act_layer='silu',
  1700. ),
  1701. gcresnext26ts=ByoModelCfg(
  1702. blocks=(
  1703. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1704. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1705. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1706. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1707. ),
  1708. stem_chs=64,
  1709. stem_type='tiered',
  1710. stem_pool='maxpool',
  1711. act_layer='silu',
  1712. attn_layer='gca',
  1713. ),
  1714. seresnext26ts=ByoModelCfg(
  1715. blocks=(
  1716. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1717. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1718. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1719. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1720. ),
  1721. stem_chs=64,
  1722. stem_type='tiered',
  1723. stem_pool='maxpool',
  1724. act_layer='silu',
  1725. attn_layer='se',
  1726. ),
  1727. eca_resnext26ts=ByoModelCfg(
  1728. blocks=(
  1729. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1730. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1731. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1732. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1733. ),
  1734. stem_chs=64,
  1735. stem_type='tiered',
  1736. stem_pool='maxpool',
  1737. act_layer='silu',
  1738. attn_layer='eca',
  1739. ),
  1740. bat_resnext26ts=ByoModelCfg(
  1741. blocks=(
  1742. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1743. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1744. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1745. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1746. ),
  1747. stem_chs=64,
  1748. stem_type='tiered',
  1749. stem_pool='maxpool',
  1750. act_layer='silu',
  1751. attn_layer='bat',
  1752. attn_kwargs=dict(block_size=8)
  1753. ),
  1754. # ResNet-32 (2, 3, 3, 2) models w/ no attn, no groups, SiLU act, no pre-fc feat layer, tiered stem w/o maxpool
  1755. resnet32ts=ByoModelCfg(
  1756. blocks=(
  1757. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1758. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1759. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1760. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1761. ),
  1762. stem_chs=64,
  1763. stem_type='tiered',
  1764. stem_pool='',
  1765. num_features=0,
  1766. act_layer='silu',
  1767. ),
  1768. # ResNet-33 (2, 3, 3, 2) models w/ no attn, no groups, SiLU act, 1280 pre-FC feat, tiered stem w/o maxpool
  1769. resnet33ts=ByoModelCfg(
  1770. blocks=(
  1771. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1772. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1773. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1774. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1775. ),
  1776. stem_chs=64,
  1777. stem_type='tiered',
  1778. stem_pool='',
  1779. num_features=1280,
  1780. act_layer='silu',
  1781. ),
  1782. # A series of ResNet-33 (2, 3, 3, 2) models w/ one of GC, SE, ECA attn, no groups, SiLU act, 1280 pre-FC feat
  1783. # and a tiered stem w/ no maxpool
  1784. gcresnet33ts=ByoModelCfg(
  1785. blocks=(
  1786. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1787. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1788. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1789. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1790. ),
  1791. stem_chs=64,
  1792. stem_type='tiered',
  1793. stem_pool='',
  1794. num_features=1280,
  1795. act_layer='silu',
  1796. attn_layer='gca',
  1797. ),
  1798. seresnet33ts=ByoModelCfg(
  1799. blocks=(
  1800. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1801. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1802. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1803. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1804. ),
  1805. stem_chs=64,
  1806. stem_type='tiered',
  1807. stem_pool='',
  1808. num_features=1280,
  1809. act_layer='silu',
  1810. attn_layer='se',
  1811. ),
  1812. eca_resnet33ts=ByoModelCfg(
  1813. blocks=(
  1814. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1815. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1816. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1817. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1818. ),
  1819. stem_chs=64,
  1820. stem_type='tiered',
  1821. stem_pool='',
  1822. num_features=1280,
  1823. act_layer='silu',
  1824. attn_layer='eca',
  1825. ),
  1826. gcresnet50t=ByoModelCfg(
  1827. blocks=(
  1828. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  1829. ByoBlockCfg(type='bottle', d=4, c=512, s=2, br=0.25),
  1830. ByoBlockCfg(type='bottle', d=6, c=1024, s=2, br=0.25),
  1831. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, br=0.25),
  1832. ),
  1833. stem_chs=64,
  1834. stem_type='tiered',
  1835. stem_pool='',
  1836. attn_layer='gca',
  1837. ),
  1838. gcresnext50ts=ByoModelCfg(
  1839. blocks=(
  1840. ByoBlockCfg(type='bottle', d=3, c=256, s=1, gs=32, br=0.25),
  1841. ByoBlockCfg(type='bottle', d=4, c=512, s=2, gs=32, br=0.25),
  1842. ByoBlockCfg(type='bottle', d=6, c=1024, s=2, gs=32, br=0.25),
  1843. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, gs=32, br=0.25),
  1844. ),
  1845. stem_chs=64,
  1846. stem_type='tiered',
  1847. stem_pool='maxpool',
  1848. act_layer='silu',
  1849. attn_layer='gca',
  1850. ),
  1851. # experimental models, closer to a RegNetZ than a ResNet. Similar to EfficientNets but w/ groups instead of DW
  1852. regnetz_b16=ByoModelCfg(
  1853. blocks=(
  1854. ByoBlockCfg(type='bottle', d=2, c=48, s=2, gs=16, br=3),
  1855. ByoBlockCfg(type='bottle', d=6, c=96, s=2, gs=16, br=3),
  1856. ByoBlockCfg(type='bottle', d=12, c=192, s=2, gs=16, br=3),
  1857. ByoBlockCfg(type='bottle', d=2, c=288, s=2, gs=16, br=3),
  1858. ),
  1859. stem_chs=32,
  1860. stem_pool='',
  1861. downsample='',
  1862. num_features=1536,
  1863. act_layer='silu',
  1864. attn_layer='se',
  1865. attn_kwargs=dict(rd_ratio=0.25),
  1866. block_kwargs=dict(bottle_in=True, linear_out=True),
  1867. ),
  1868. regnetz_c16=ByoModelCfg(
  1869. blocks=(
  1870. ByoBlockCfg(type='bottle', d=2, c=48, s=2, gs=16, br=4),
  1871. ByoBlockCfg(type='bottle', d=6, c=96, s=2, gs=16, br=4),
  1872. ByoBlockCfg(type='bottle', d=12, c=192, s=2, gs=16, br=4),
  1873. ByoBlockCfg(type='bottle', d=2, c=288, s=2, gs=16, br=4),
  1874. ),
  1875. stem_chs=32,
  1876. stem_pool='',
  1877. downsample='',
  1878. num_features=1536,
  1879. act_layer='silu',
  1880. attn_layer='se',
  1881. attn_kwargs=dict(rd_ratio=0.25),
  1882. block_kwargs=dict(bottle_in=True, linear_out=True),
  1883. ),
  1884. regnetz_d32=ByoModelCfg(
  1885. blocks=(
  1886. ByoBlockCfg(type='bottle', d=3, c=64, s=1, gs=32, br=4),
  1887. ByoBlockCfg(type='bottle', d=6, c=128, s=2, gs=32, br=4),
  1888. ByoBlockCfg(type='bottle', d=12, c=256, s=2, gs=32, br=4),
  1889. ByoBlockCfg(type='bottle', d=3, c=384, s=2, gs=32, br=4),
  1890. ),
  1891. stem_chs=64,
  1892. stem_type='tiered',
  1893. stem_pool='',
  1894. downsample='',
  1895. num_features=1792,
  1896. act_layer='silu',
  1897. attn_layer='se',
  1898. attn_kwargs=dict(rd_ratio=0.25),
  1899. block_kwargs=dict(bottle_in=True, linear_out=True),
  1900. ),
  1901. regnetz_d8=ByoModelCfg(
  1902. blocks=(
  1903. ByoBlockCfg(type='bottle', d=3, c=64, s=1, gs=8, br=4),
  1904. ByoBlockCfg(type='bottle', d=6, c=128, s=2, gs=8, br=4),
  1905. ByoBlockCfg(type='bottle', d=12, c=256, s=2, gs=8, br=4),
  1906. ByoBlockCfg(type='bottle', d=3, c=384, s=2, gs=8, br=4),
  1907. ),
  1908. stem_chs=64,
  1909. stem_type='tiered',
  1910. stem_pool='',
  1911. downsample='',
  1912. num_features=1792,
  1913. act_layer='silu',
  1914. attn_layer='se',
  1915. attn_kwargs=dict(rd_ratio=0.25),
  1916. block_kwargs=dict(bottle_in=True, linear_out=True),
  1917. ),
  1918. regnetz_e8=ByoModelCfg(
  1919. blocks=(
  1920. ByoBlockCfg(type='bottle', d=3, c=96, s=1, gs=8, br=4),
  1921. ByoBlockCfg(type='bottle', d=8, c=192, s=2, gs=8, br=4),
  1922. ByoBlockCfg(type='bottle', d=16, c=384, s=2, gs=8, br=4),
  1923. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=8, br=4),
  1924. ),
  1925. stem_chs=64,
  1926. stem_type='tiered',
  1927. stem_pool='',
  1928. downsample='',
  1929. num_features=2048,
  1930. act_layer='silu',
  1931. attn_layer='se',
  1932. attn_kwargs=dict(rd_ratio=0.25),
  1933. block_kwargs=dict(bottle_in=True, linear_out=True),
  1934. ),
  1935. # experimental EvoNorm configs
  1936. regnetz_b16_evos=ByoModelCfg(
  1937. blocks=(
  1938. ByoBlockCfg(type='bottle', d=2, c=48, s=2, gs=16, br=3),
  1939. ByoBlockCfg(type='bottle', d=6, c=96, s=2, gs=16, br=3),
  1940. ByoBlockCfg(type='bottle', d=12, c=192, s=2, gs=16, br=3),
  1941. ByoBlockCfg(type='bottle', d=2, c=288, s=2, gs=16, br=3),
  1942. ),
  1943. stem_chs=32,
  1944. stem_pool='',
  1945. downsample='',
  1946. num_features=1536,
  1947. act_layer='silu',
  1948. norm_layer=partial(EvoNorm2dS0a, group_size=16),
  1949. attn_layer='se',
  1950. attn_kwargs=dict(rd_ratio=0.25),
  1951. block_kwargs=dict(bottle_in=True, linear_out=True),
  1952. ),
  1953. regnetz_c16_evos=ByoModelCfg(
  1954. blocks=(
  1955. ByoBlockCfg(type='bottle', d=2, c=48, s=2, gs=16, br=4),
  1956. ByoBlockCfg(type='bottle', d=6, c=96, s=2, gs=16, br=4),
  1957. ByoBlockCfg(type='bottle', d=12, c=192, s=2, gs=16, br=4),
  1958. ByoBlockCfg(type='bottle', d=2, c=288, s=2, gs=16, br=4),
  1959. ),
  1960. stem_chs=32,
  1961. stem_pool='',
  1962. downsample='',
  1963. num_features=1536,
  1964. act_layer='silu',
  1965. norm_layer=partial(EvoNorm2dS0a, group_size=16),
  1966. attn_layer='se',
  1967. attn_kwargs=dict(rd_ratio=0.25),
  1968. block_kwargs=dict(bottle_in=True, linear_out=True),
  1969. ),
  1970. regnetz_d8_evos=ByoModelCfg(
  1971. blocks=(
  1972. ByoBlockCfg(type='bottle', d=3, c=64, s=1, gs=8, br=4),
  1973. ByoBlockCfg(type='bottle', d=6, c=128, s=2, gs=8, br=4),
  1974. ByoBlockCfg(type='bottle', d=12, c=256, s=2, gs=8, br=4),
  1975. ByoBlockCfg(type='bottle', d=3, c=384, s=2, gs=8, br=4),
  1976. ),
  1977. stem_chs=64,
  1978. stem_type='deep',
  1979. stem_pool='',
  1980. downsample='',
  1981. num_features=1792,
  1982. act_layer='silu',
  1983. norm_layer=partial(EvoNorm2dS0a, group_size=16),
  1984. attn_layer='se',
  1985. attn_kwargs=dict(rd_ratio=0.25),
  1986. block_kwargs=dict(bottle_in=True, linear_out=True),
  1987. ),
  1988. mobileone_s0=ByoModelCfg(
  1989. blocks=_mobileone_bcfg(wf=(0.75, 1.0, 1.0, 2.), num_conv_branches=4),
  1990. stem_type='one',
  1991. stem_chs=48,
  1992. ),
  1993. mobileone_s1=ByoModelCfg(
  1994. blocks=_mobileone_bcfg(wf=(1.5, 1.5, 2.0, 2.5)),
  1995. stem_type='one',
  1996. stem_chs=64,
  1997. ),
  1998. mobileone_s2=ByoModelCfg(
  1999. blocks=_mobileone_bcfg(wf=(1.5, 2.0, 2.5, 4.0)),
  2000. stem_type='one',
  2001. stem_chs=64,
  2002. ),
  2003. mobileone_s3=ByoModelCfg(
  2004. blocks=_mobileone_bcfg(wf=(2.0, 2.5, 3.0, 4.0)),
  2005. stem_type='one',
  2006. stem_chs=64,
  2007. ),
  2008. mobileone_s4=ByoModelCfg(
  2009. blocks=_mobileone_bcfg(wf=(3.0, 3.5, 3.5, 4.0), se_blocks=(0, 0, 5, 1)),
  2010. stem_type='one',
  2011. stem_chs=64,
  2012. ),
  2013. resnet50_clip=ByoModelCfg(
  2014. blocks=(
  2015. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  2016. ByoBlockCfg(type='bottle', d=4, c=512, s=2, br=0.25),
  2017. ByoBlockCfg(type='bottle', d=6, c=1024, s=2, br=0.25),
  2018. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, br=0.25),
  2019. ),
  2020. stem_chs=(32, 32, 64),
  2021. stem_type='',
  2022. stem_pool='avg2',
  2023. downsample='avg',
  2024. aa_layer='avg',
  2025. head_type='attn_abs',
  2026. ),
  2027. resnet101_clip=ByoModelCfg(
  2028. blocks=(
  2029. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  2030. ByoBlockCfg(type='bottle', d=4, c=512, s=2, br=0.25),
  2031. ByoBlockCfg(type='bottle', d=23, c=1024, s=2, br=0.25),
  2032. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, br=0.25),
  2033. ),
  2034. stem_chs=(32, 32, 64),
  2035. stem_type='',
  2036. stem_pool='avg2',
  2037. downsample='avg',
  2038. aa_layer='avg',
  2039. head_type='attn_abs',
  2040. ),
  2041. resnet50x4_clip=ByoModelCfg(
  2042. blocks=(
  2043. ByoBlockCfg(type='bottle', d=4, c=256, s=1, br=0.25),
  2044. ByoBlockCfg(type='bottle', d=6, c=512, s=2, br=0.25),
  2045. ByoBlockCfg(type='bottle', d=10, c=1024, s=2, br=0.25),
  2046. ByoBlockCfg(type='bottle', d=6, c=2048, s=2, br=0.25),
  2047. ),
  2048. width_factor=1.25,
  2049. stem_chs=(32, 32, 64),
  2050. stem_type='',
  2051. stem_pool='avg2',
  2052. downsample='avg',
  2053. aa_layer='avg',
  2054. head_type='attn_abs',
  2055. ),
  2056. resnet50x16_clip=ByoModelCfg(
  2057. blocks=(
  2058. ByoBlockCfg(type='bottle', d=6, c=256, s=1, br=0.25),
  2059. ByoBlockCfg(type='bottle', d=8, c=512, s=2, br=0.25),
  2060. ByoBlockCfg(type='bottle', d=18, c=1024, s=2, br=0.25),
  2061. ByoBlockCfg(type='bottle', d=8, c=2048, s=2, br=0.25),
  2062. ),
  2063. width_factor=1.5,
  2064. stem_chs=(32, 32, 64),
  2065. stem_type='',
  2066. stem_pool='avg2',
  2067. downsample='avg',
  2068. aa_layer='avg',
  2069. head_type='attn_abs',
  2070. ),
  2071. resnet50x64_clip=ByoModelCfg(
  2072. blocks=(
  2073. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  2074. ByoBlockCfg(type='bottle', d=15, c=512, s=2, br=0.25),
  2075. ByoBlockCfg(type='bottle', d=36, c=1024, s=2, br=0.25),
  2076. ByoBlockCfg(type='bottle', d=10, c=2048, s=2, br=0.25),
  2077. ),
  2078. width_factor=2.0,
  2079. stem_chs=(32, 32, 64),
  2080. stem_type='',
  2081. stem_pool='avg2',
  2082. downsample='avg',
  2083. aa_layer='avg',
  2084. head_type='attn_abs',
  2085. ),
  2086. resnet50_mlp=ByoModelCfg(
  2087. blocks=(
  2088. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  2089. ByoBlockCfg(type='bottle', d=4, c=512, s=2, br=0.25),
  2090. ByoBlockCfg(type='bottle', d=6, c=1024, s=2, br=0.25),
  2091. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, br=0.25),
  2092. ),
  2093. stem_chs=(32, 32, 64),
  2094. stem_type='',
  2095. stem_pool='avg2',
  2096. downsample='avg',
  2097. aa_layer='avg',
  2098. head_hidden_size=1024,
  2099. head_type='mlp',
  2100. ),
  2101. test_byobnet=ByoModelCfg(
  2102. blocks=(
  2103. ByoBlockCfg(type='edge', d=1, c=32, s=2, gs=0, br=0.5),
  2104. ByoBlockCfg(type='dark', d=1, c=64, s=2, gs=0, br=0.5),
  2105. ByoBlockCfg(type='basic', d=1, c=128, s=2, gs=32, br=0.25),
  2106. ByoBlockCfg(type='bottle', d=1, c=256, s=2, gs=64, br=0.25),
  2107. ),
  2108. stem_chs=24,
  2109. downsample='avg',
  2110. stem_pool='',
  2111. act_layer='relu',
  2112. attn_layer='se',
  2113. attn_kwargs=dict(rd_ratio=0.25),
  2114. ),
  2115. )
  2116. for k in ('resnet50_clip', 'resnet101_clip', 'resnet50x4_clip', 'resnet50x16_clip', 'resnet50x64_clip'):
  2117. model_cfgs[k + '_gap'] = replace(model_cfgs[k], head_type='classifier')
  2118. def _convert_openai_clip(
  2119. state_dict: Dict[str, torch.Tensor],
  2120. model: ByobNet,
  2121. prefix: str = 'visual.',
  2122. ) -> Dict[str, torch.Tensor]:
  2123. model_has_attn_pool = isinstance(model.head, (RotAttentionPool2d, AttentionPool2d))
  2124. import re
  2125. def _stage_sub(m):
  2126. stage_idx = int(m.group(1)) - 1
  2127. layer_idx, layer_type, layer_id = int(m.group(2)), m.group(3), int(m.group(4))
  2128. prefix_str = f'stages.{stage_idx}.{layer_idx}.'
  2129. id_map = {1: 'conv1_1x1.', 2: 'conv2_kxk.', 3: 'conv3_1x1.'}
  2130. suffix_str = id_map[layer_id] + layer_type
  2131. return prefix_str + suffix_str
  2132. def _down_sub(m):
  2133. stage_idx = int(m.group(1)) - 1
  2134. layer_idx, layer_id = int(m.group(2)), int(m.group(3))
  2135. return f'stages.{stage_idx}.{layer_idx}.shortcut.' + ('conv.conv' if layer_id == 0 else 'conv.bn')
  2136. out_dict = {}
  2137. for k, v in state_dict.items():
  2138. if not k.startswith(prefix):
  2139. continue
  2140. k = re.sub(rf'{prefix}conv([0-9])', r'stem.conv\1.conv', k)
  2141. k = re.sub(rf'{prefix}bn([0-9])', r'stem.conv\1.bn', k)
  2142. k = re.sub(rf'{prefix}layer([0-9])\.([0-9]+)\.([a-z]+)([0-9])', _stage_sub, k)
  2143. k = re.sub(rf'{prefix}layer([0-9])\.([0-9]+)\.downsample\.([0-9])', _down_sub, k)
  2144. if k.startswith(f'{prefix}attnpool'):
  2145. if not model_has_attn_pool:
  2146. continue
  2147. k = k.replace(prefix + 'attnpool', 'head') # 'attn_pool')
  2148. k = k.replace('positional_embedding', 'pos_embed')
  2149. k = k.replace('q_proj', 'q')
  2150. k = k.replace('k_proj', 'k')
  2151. k = k.replace('v_proj', 'v')
  2152. k = k.replace('c_proj', 'proj')
  2153. out_dict[k] = v
  2154. return out_dict
  2155. def checkpoint_filter_fn(
  2156. state_dict: Dict[str, torch.Tensor],
  2157. model: ByobNet
  2158. ):
  2159. if 'visual.conv1.weight' in state_dict:
  2160. state_dict = _convert_openai_clip(state_dict, model)
  2161. return state_dict
  2162. def _create_byobnet(variant: str, pretrained: bool = False, **kwargs) -> ByobNet:
  2163. """Create a ByobNet model.
  2164. Args:
  2165. variant: Model variant name.
  2166. pretrained: Load pretrained weights.
  2167. **kwargs: Additional model arguments.
  2168. Returns:
  2169. ByobNet model instance.
  2170. """
  2171. return build_model_with_cfg(
  2172. ByobNet, variant, pretrained,
  2173. model_cfg=model_cfgs[variant],
  2174. pretrained_filter_fn=checkpoint_filter_fn,
  2175. feature_cfg=dict(flatten_sequential=True),
  2176. **kwargs,
  2177. )
  2178. def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
  2179. """Create default configuration dictionary.
  2180. Args:
  2181. url: Model weight URL.
  2182. **kwargs: Additional configuration options.
  2183. Returns:
  2184. Configuration dictionary.
  2185. """
  2186. return {
  2187. 'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
  2188. 'crop_pct': 0.875, 'interpolation': 'bilinear',
  2189. 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
  2190. 'first_conv': 'stem.conv', 'classifier': 'head.fc',
  2191. 'license': 'apache-2.0',
  2192. **kwargs
  2193. }
  2194. def _cfgr(url: str = '', **kwargs) -> Dict[str, Any]:
  2195. """Create RepVGG configuration dictionary.
  2196. Args:
  2197. url: Model weight URL.
  2198. **kwargs: Additional configuration options.
  2199. Returns:
  2200. Configuration dictionary.
  2201. """
  2202. return {
  2203. 'url': url, 'num_classes': 1000, 'input_size': (3, 256, 256), 'pool_size': (8, 8),
  2204. 'crop_pct': 0.9, 'interpolation': 'bicubic',
  2205. 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
  2206. 'first_conv': 'stem.conv1.conv', 'classifier': 'head.fc',
  2207. 'license': 'apache-2.0',
  2208. **kwargs
  2209. }
  2210. default_cfgs = generate_default_cfgs({
  2211. # GPU-Efficient (ResNet) weights
  2212. 'gernet_s.idstcv_in1k': _cfg(hf_hub_id='timm/'),
  2213. 'gernet_m.idstcv_in1k': _cfg(hf_hub_id='timm/'),
  2214. 'gernet_l.idstcv_in1k': _cfg(hf_hub_id='timm/', input_size=(3, 256, 256), pool_size=(8, 8)),
  2215. # RepVGG weights
  2216. 'repvgg_a0.rvgg_in1k': _cfg(
  2217. hf_hub_id='timm/',
  2218. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2219. 'repvgg_a1.rvgg_in1k': _cfg(
  2220. hf_hub_id='timm/',
  2221. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2222. 'repvgg_a2.rvgg_in1k': _cfg(
  2223. hf_hub_id='timm/',
  2224. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2225. 'repvgg_b0.rvgg_in1k': _cfg(
  2226. hf_hub_id='timm/',
  2227. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2228. 'repvgg_b1.rvgg_in1k': _cfg(
  2229. hf_hub_id='timm/',
  2230. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2231. 'repvgg_b1g4.rvgg_in1k': _cfg(
  2232. hf_hub_id='timm/',
  2233. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2234. 'repvgg_b2.rvgg_in1k': _cfg(
  2235. hf_hub_id='timm/',
  2236. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2237. 'repvgg_b2g4.rvgg_in1k': _cfg(
  2238. hf_hub_id='timm/',
  2239. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2240. 'repvgg_b3.rvgg_in1k': _cfg(
  2241. hf_hub_id='timm/',
  2242. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2243. 'repvgg_b3g4.rvgg_in1k': _cfg(
  2244. hf_hub_id='timm/',
  2245. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2246. 'repvgg_d2se.rvgg_in1k': _cfg(
  2247. hf_hub_id='timm/',
  2248. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit',
  2249. input_size=(3, 320, 320), pool_size=(10, 10), crop_pct=1.0,
  2250. ),
  2251. # experimental ResNet configs
  2252. 'resnet51q.ra2_in1k': _cfg(
  2253. hf_hub_id='timm/',
  2254. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet51q_ra2-d47dcc76.pth',
  2255. first_conv='stem.conv1', input_size=(3, 256, 256), pool_size=(8, 8),
  2256. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2257. 'resnet61q.ra2_in1k': _cfgr(
  2258. hf_hub_id='timm/',
  2259. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet61q_ra2-6afc536c.pth',
  2260. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2261. # ResNeXt-26 models with different attention in Bottleneck blocks
  2262. 'resnext26ts.ra2_in1k': _cfgr(
  2263. hf_hub_id='timm/',
  2264. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/resnext26ts_256_ra2-8bbd9106.pth',
  2265. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2266. 'seresnext26ts.ch_in1k': _cfgr(
  2267. hf_hub_id='timm/',
  2268. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/seresnext26ts_256-6f0d74a3.pth',
  2269. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2270. 'gcresnext26ts.ch_in1k': _cfgr(
  2271. hf_hub_id='timm/',
  2272. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/gcresnext26ts_256-e414378b.pth',
  2273. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2274. 'eca_resnext26ts.ch_in1k': _cfgr(
  2275. hf_hub_id='timm/',
  2276. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/eca_resnext26ts_256-5a1d030f.pth',
  2277. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2278. 'bat_resnext26ts.ch_in1k': _cfgr(
  2279. hf_hub_id='timm/',
  2280. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/bat_resnext26ts_256-fa6fd595.pth',
  2281. min_input_size=(3, 256, 256)),
  2282. # ResNet-32 / 33 models with different attention in Bottleneck blocks
  2283. 'resnet32ts.ra2_in1k': _cfgr(
  2284. hf_hub_id='timm/',
  2285. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/resnet32ts_256-aacf5250.pth',
  2286. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2287. 'resnet33ts.ra2_in1k': _cfgr(
  2288. hf_hub_id='timm/',
  2289. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/resnet33ts_256-e91b09a4.pth',
  2290. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2291. 'gcresnet33ts.ra2_in1k': _cfgr(
  2292. hf_hub_id='timm/',
  2293. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/gcresnet33ts_256-0e0cd345.pth',
  2294. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2295. 'seresnet33ts.ra2_in1k': _cfgr(
  2296. hf_hub_id='timm/',
  2297. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/seresnet33ts_256-f8ad44d9.pth',
  2298. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2299. 'eca_resnet33ts.ra2_in1k': _cfgr(
  2300. hf_hub_id='timm/',
  2301. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/eca_resnet33ts_256-8f98face.pth',
  2302. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2303. 'gcresnet50t.ra2_in1k': _cfgr(
  2304. hf_hub_id='timm/',
  2305. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/gcresnet50t_256-96374d1c.pth',
  2306. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2307. 'gcresnext50ts.ch_in1k': _cfgr(
  2308. hf_hub_id='timm/',
  2309. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/gcresnext50ts_256-3e0f515e.pth',
  2310. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2311. # custom `timm` specific RegNetZ inspired models w/ different sizing from paper
  2312. 'regnetz_b16.ra3_in1k': _cfgr(
  2313. hf_hub_id='timm/',
  2314. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_b_raa-677d9606.pth',
  2315. first_conv='stem.conv', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
  2316. input_size=(3, 224, 224), pool_size=(7, 7), crop_pct=0.94, test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2317. 'regnetz_c16.ra3_in1k': _cfgr(
  2318. hf_hub_id='timm/',
  2319. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_c_rab2_256-a54bf36a.pth',
  2320. first_conv='stem.conv', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
  2321. crop_pct=0.94, test_input_size=(3, 320, 320), test_crop_pct=1.0),
  2322. 'regnetz_d32.ra3_in1k': _cfgr(
  2323. hf_hub_id='timm/',
  2324. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_d_rab_256-b8073a89.pth',
  2325. mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=0.95, test_input_size=(3, 320, 320)),
  2326. 'regnetz_d8.ra3_in1k': _cfgr(
  2327. hf_hub_id='timm/',
  2328. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_d8_bh-afc03c55.pth',
  2329. mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=0.94, test_input_size=(3, 320, 320), test_crop_pct=1.0),
  2330. 'regnetz_e8.ra3_in1k': _cfgr(
  2331. hf_hub_id='timm/',
  2332. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_e8_bh-aace8e6e.pth',
  2333. mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=0.94, test_input_size=(3, 320, 320), test_crop_pct=1.0),
  2334. 'regnetz_b16_evos.untrained': _cfgr(
  2335. first_conv='stem.conv', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
  2336. input_size=(3, 224, 224), pool_size=(7, 7), crop_pct=0.95, test_input_size=(3, 288, 288)),
  2337. 'regnetz_c16_evos.ch_in1k': _cfgr(
  2338. hf_hub_id='timm/',
  2339. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/regnetz_c16_evos_ch-d8311942.pth',
  2340. first_conv='stem.conv', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
  2341. crop_pct=0.95, test_input_size=(3, 320, 320)),
  2342. 'regnetz_d8_evos.ch_in1k': _cfgr(
  2343. hf_hub_id='timm/',
  2344. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/regnetz_d8_evos_ch-2bc12646.pth',
  2345. mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=0.95, test_input_size=(3, 320, 320), test_crop_pct=1.0),
  2346. 'mobileone_s0.apple_in1k': _cfg(
  2347. hf_hub_id='timm/',
  2348. crop_pct=0.875,
  2349. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2350. license='mobileone-license',
  2351. ),
  2352. 'mobileone_s1.apple_in1k': _cfg(
  2353. hf_hub_id='timm/',
  2354. crop_pct=0.9,
  2355. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2356. license='mobileone-license',
  2357. ),
  2358. 'mobileone_s2.apple_in1k': _cfg(
  2359. hf_hub_id='timm/',
  2360. crop_pct=0.9,
  2361. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2362. license='mobileone-license',
  2363. ),
  2364. 'mobileone_s3.apple_in1k': _cfg(
  2365. hf_hub_id='timm/',
  2366. crop_pct=0.9,
  2367. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2368. license='mobileone-license',
  2369. ),
  2370. 'mobileone_s4.apple_in1k': _cfg(
  2371. hf_hub_id='timm/',
  2372. crop_pct=0.9,
  2373. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2374. license='mobileone-license',
  2375. ),
  2376. # original attention pool head variants
  2377. 'resnet50_clip.openai': _cfgr(
  2378. hf_hub_id='timm/',
  2379. num_classes=1024, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2380. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2381. classifier='head.proj',
  2382. license='mit',
  2383. ),
  2384. 'resnet101_clip.openai': _cfgr(
  2385. hf_hub_id='timm/',
  2386. num_classes=512, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2387. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2388. classifier='head.proj',
  2389. license='mit',
  2390. ),
  2391. 'resnet50x4_clip.openai': _cfgr(
  2392. hf_hub_id='timm/',
  2393. num_classes=640, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2394. fixed_input_size=True, input_size=(3, 288, 288), pool_size=(9, 9),
  2395. classifier='head.proj',
  2396. license='mit',
  2397. ),
  2398. 'resnet50x16_clip.openai': _cfgr(
  2399. hf_hub_id='timm/',
  2400. num_classes=768, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2401. fixed_input_size=True, input_size=(3, 384, 384), pool_size=(12, 12),
  2402. classifier='head.proj',
  2403. license='mit',
  2404. ),
  2405. 'resnet50x64_clip.openai': _cfgr(
  2406. hf_hub_id='timm/',
  2407. num_classes=1024, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2408. fixed_input_size=True, input_size=(3, 448, 448), pool_size=(14, 14),
  2409. classifier='head.proj',
  2410. license='mit',
  2411. ),
  2412. 'resnet50_clip.cc12m': _cfgr(
  2413. hf_hub_id='timm/',
  2414. num_classes=1024, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2415. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2416. classifier='head.proj',
  2417. license='mit',
  2418. ),
  2419. 'resnet50_clip.yfcc15m': _cfgr(
  2420. hf_hub_id='timm/',
  2421. num_classes=1024, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2422. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2423. classifier='head.proj',
  2424. license='mit',
  2425. ),
  2426. 'resnet101_clip.yfcc15m': _cfgr(
  2427. hf_hub_id='timm/',
  2428. num_classes=512, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2429. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2430. classifier='head.proj',
  2431. license='mit',
  2432. ),
  2433. # avg-pool w/ optional standard classifier head variants
  2434. 'resnet50_clip_gap.openai': _cfgr(
  2435. hf_hub_id='timm/',
  2436. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2437. input_size=(3, 224, 224), pool_size=(7, 7),
  2438. license='mit',
  2439. ),
  2440. 'resnet101_clip_gap.openai': _cfgr(
  2441. hf_hub_id='timm/',
  2442. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2443. input_size=(3, 224, 224), pool_size=(7, 7),
  2444. license='mit',
  2445. ),
  2446. 'resnet50x4_clip_gap.openai': _cfgr(
  2447. hf_hub_id='timm/',
  2448. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2449. input_size=(3, 288, 288), pool_size=(9, 9),
  2450. license='mit',
  2451. ),
  2452. 'resnet50x16_clip_gap.openai': _cfgr(
  2453. hf_hub_id='timm/',
  2454. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2455. input_size=(3, 384, 384), pool_size=(12, 12),
  2456. license='mit',
  2457. ),
  2458. 'resnet50x64_clip_gap.openai': _cfgr(
  2459. hf_hub_id='timm/',
  2460. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2461. input_size=(3, 448, 448), pool_size=(14, 14),
  2462. license='mit',
  2463. ),
  2464. 'resnet50_clip_gap.cc12m': _cfgr(
  2465. hf_hub_id='timm/',
  2466. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2467. input_size=(3, 224, 224), pool_size=(7, 7),
  2468. license='mit',
  2469. ),
  2470. 'resnet50_clip_gap.yfcc15m': _cfgr(
  2471. hf_hub_id='timm/',
  2472. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2473. input_size=(3, 224, 224), pool_size=(7, 7),
  2474. license='mit',
  2475. ),
  2476. 'resnet101_clip_gap.yfcc15m': _cfgr(
  2477. hf_hub_id='timm/',
  2478. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2479. input_size=(3, 224, 224), pool_size=(7, 7),
  2480. license='mit',
  2481. ),
  2482. 'resnet50_mlp.untrained': _cfgr(
  2483. input_size=(3, 256, 256), pool_size=(8, 8),
  2484. ),
  2485. 'test_byobnet.r160_in1k': _cfgr(
  2486. hf_hub_id='timm/',
  2487. first_conv='stem.conv',
  2488. input_size=(3, 160, 160), crop_pct=0.95, pool_size=(5, 5),
  2489. ),
  2490. })
  2491. @register_model
  2492. def gernet_l(pretrained=False, **kwargs) -> ByobNet:
  2493. """ GEResNet-Large (GENet-Large from official impl)
  2494. `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090
  2495. """
  2496. return _create_byobnet('gernet_l', pretrained=pretrained, **kwargs)
  2497. @register_model
  2498. def gernet_m(pretrained=False, **kwargs) -> ByobNet:
  2499. """ GEResNet-Medium (GENet-Normal from official impl)
  2500. `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090
  2501. """
  2502. return _create_byobnet('gernet_m', pretrained=pretrained, **kwargs)
  2503. @register_model
  2504. def gernet_s(pretrained=False, **kwargs) -> ByobNet:
  2505. """ EResNet-Small (GENet-Small from official impl)
  2506. `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090
  2507. """
  2508. return _create_byobnet('gernet_s', pretrained=pretrained, **kwargs)
  2509. @register_model
  2510. def repvgg_a0(pretrained=False, **kwargs) -> ByobNet:
  2511. """ RepVGG-A0
  2512. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2513. """
  2514. return _create_byobnet('repvgg_a0', pretrained=pretrained, **kwargs)
  2515. @register_model
  2516. def repvgg_a1(pretrained=False, **kwargs) -> ByobNet:
  2517. """ RepVGG-A1
  2518. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2519. """
  2520. return _create_byobnet('repvgg_a1', pretrained=pretrained, **kwargs)
  2521. @register_model
  2522. def repvgg_a2(pretrained=False, **kwargs) -> ByobNet:
  2523. """ RepVGG-A2
  2524. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2525. """
  2526. return _create_byobnet('repvgg_a2', pretrained=pretrained, **kwargs)
  2527. @register_model
  2528. def repvgg_b0(pretrained=False, **kwargs) -> ByobNet:
  2529. """ RepVGG-B0
  2530. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2531. """
  2532. return _create_byobnet('repvgg_b0', pretrained=pretrained, **kwargs)
  2533. @register_model
  2534. def repvgg_b1(pretrained=False, **kwargs) -> ByobNet:
  2535. """ RepVGG-B1
  2536. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2537. """
  2538. return _create_byobnet('repvgg_b1', pretrained=pretrained, **kwargs)
  2539. @register_model
  2540. def repvgg_b1g4(pretrained=False, **kwargs) -> ByobNet:
  2541. """ RepVGG-B1g4
  2542. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2543. """
  2544. return _create_byobnet('repvgg_b1g4', pretrained=pretrained, **kwargs)
  2545. @register_model
  2546. def repvgg_b2(pretrained=False, **kwargs) -> ByobNet:
  2547. """ RepVGG-B2
  2548. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2549. """
  2550. return _create_byobnet('repvgg_b2', pretrained=pretrained, **kwargs)
  2551. @register_model
  2552. def repvgg_b2g4(pretrained=False, **kwargs) -> ByobNet:
  2553. """ RepVGG-B2g4
  2554. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2555. """
  2556. return _create_byobnet('repvgg_b2g4', pretrained=pretrained, **kwargs)
  2557. @register_model
  2558. def repvgg_b3(pretrained=False, **kwargs) -> ByobNet:
  2559. """ RepVGG-B3
  2560. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2561. """
  2562. return _create_byobnet('repvgg_b3', pretrained=pretrained, **kwargs)
  2563. @register_model
  2564. def repvgg_b3g4(pretrained=False, **kwargs) -> ByobNet:
  2565. """ RepVGG-B3g4
  2566. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2567. """
  2568. return _create_byobnet('repvgg_b3g4', pretrained=pretrained, **kwargs)
  2569. @register_model
  2570. def repvgg_d2se(pretrained=False, **kwargs) -> ByobNet:
  2571. """ RepVGG-D2se
  2572. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2573. """
  2574. return _create_byobnet('repvgg_d2se', pretrained=pretrained, **kwargs)
  2575. @register_model
  2576. def resnet51q(pretrained=False, **kwargs) -> ByobNet:
  2577. """
  2578. """
  2579. return _create_byobnet('resnet51q', pretrained=pretrained, **kwargs)
  2580. @register_model
  2581. def resnet61q(pretrained=False, **kwargs) -> ByobNet:
  2582. """
  2583. """
  2584. return _create_byobnet('resnet61q', pretrained=pretrained, **kwargs)
  2585. @register_model
  2586. def resnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2587. """
  2588. """
  2589. return _create_byobnet('resnext26ts', pretrained=pretrained, **kwargs)
  2590. @register_model
  2591. def gcresnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2592. """
  2593. """
  2594. return _create_byobnet('gcresnext26ts', pretrained=pretrained, **kwargs)
  2595. @register_model
  2596. def seresnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2597. """
  2598. """
  2599. return _create_byobnet('seresnext26ts', pretrained=pretrained, **kwargs)
  2600. @register_model
  2601. def eca_resnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2602. """
  2603. """
  2604. return _create_byobnet('eca_resnext26ts', pretrained=pretrained, **kwargs)
  2605. @register_model
  2606. def bat_resnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2607. """
  2608. """
  2609. return _create_byobnet('bat_resnext26ts', pretrained=pretrained, **kwargs)
  2610. @register_model
  2611. def resnet32ts(pretrained=False, **kwargs) -> ByobNet:
  2612. """
  2613. """
  2614. return _create_byobnet('resnet32ts', pretrained=pretrained, **kwargs)
  2615. @register_model
  2616. def resnet33ts(pretrained=False, **kwargs) -> ByobNet:
  2617. """
  2618. """
  2619. return _create_byobnet('resnet33ts', pretrained=pretrained, **kwargs)
  2620. @register_model
  2621. def gcresnet33ts(pretrained=False, **kwargs) -> ByobNet:
  2622. """
  2623. """
  2624. return _create_byobnet('gcresnet33ts', pretrained=pretrained, **kwargs)
  2625. @register_model
  2626. def seresnet33ts(pretrained=False, **kwargs) -> ByobNet:
  2627. """
  2628. """
  2629. return _create_byobnet('seresnet33ts', pretrained=pretrained, **kwargs)
  2630. @register_model
  2631. def eca_resnet33ts(pretrained=False, **kwargs) -> ByobNet:
  2632. """
  2633. """
  2634. return _create_byobnet('eca_resnet33ts', pretrained=pretrained, **kwargs)
  2635. @register_model
  2636. def gcresnet50t(pretrained=False, **kwargs) -> ByobNet:
  2637. """
  2638. """
  2639. return _create_byobnet('gcresnet50t', pretrained=pretrained, **kwargs)
  2640. @register_model
  2641. def gcresnext50ts(pretrained=False, **kwargs) -> ByobNet:
  2642. """
  2643. """
  2644. return _create_byobnet('gcresnext50ts', pretrained=pretrained, **kwargs)
  2645. @register_model
  2646. def regnetz_b16(pretrained=False, **kwargs) -> ByobNet:
  2647. """
  2648. """
  2649. return _create_byobnet('regnetz_b16', pretrained=pretrained, **kwargs)
  2650. @register_model
  2651. def regnetz_c16(pretrained=False, **kwargs) -> ByobNet:
  2652. """
  2653. """
  2654. return _create_byobnet('regnetz_c16', pretrained=pretrained, **kwargs)
  2655. @register_model
  2656. def regnetz_d32(pretrained=False, **kwargs) -> ByobNet:
  2657. """
  2658. """
  2659. return _create_byobnet('regnetz_d32', pretrained=pretrained, **kwargs)
  2660. @register_model
  2661. def regnetz_d8(pretrained=False, **kwargs) -> ByobNet:
  2662. """
  2663. """
  2664. return _create_byobnet('regnetz_d8', pretrained=pretrained, **kwargs)
  2665. @register_model
  2666. def regnetz_e8(pretrained=False, **kwargs) -> ByobNet:
  2667. """
  2668. """
  2669. return _create_byobnet('regnetz_e8', pretrained=pretrained, **kwargs)
  2670. @register_model
  2671. def regnetz_b16_evos(pretrained=False, **kwargs) -> ByobNet:
  2672. """
  2673. """
  2674. return _create_byobnet('regnetz_b16_evos', pretrained=pretrained, **kwargs)
  2675. @register_model
  2676. def regnetz_c16_evos(pretrained=False, **kwargs) -> ByobNet:
  2677. """
  2678. """
  2679. return _create_byobnet('regnetz_c16_evos', pretrained=pretrained, **kwargs)
  2680. @register_model
  2681. def regnetz_d8_evos(pretrained=False, **kwargs) -> ByobNet:
  2682. """
  2683. """
  2684. return _create_byobnet('regnetz_d8_evos', pretrained=pretrained, **kwargs)
  2685. @register_model
  2686. def mobileone_s0(pretrained=False, **kwargs) -> ByobNet:
  2687. """
  2688. """
  2689. return _create_byobnet('mobileone_s0', pretrained=pretrained, **kwargs)
  2690. @register_model
  2691. def mobileone_s1(pretrained=False, **kwargs) -> ByobNet:
  2692. """
  2693. """
  2694. return _create_byobnet('mobileone_s1', pretrained=pretrained, **kwargs)
  2695. @register_model
  2696. def mobileone_s2(pretrained=False, **kwargs) -> ByobNet:
  2697. """
  2698. """
  2699. return _create_byobnet('mobileone_s2', pretrained=pretrained, **kwargs)
  2700. @register_model
  2701. def mobileone_s3(pretrained=False, **kwargs) -> ByobNet:
  2702. """
  2703. """
  2704. return _create_byobnet('mobileone_s3', pretrained=pretrained, **kwargs)
  2705. @register_model
  2706. def mobileone_s4(pretrained=False, **kwargs) -> ByobNet:
  2707. """
  2708. """
  2709. return _create_byobnet('mobileone_s4', pretrained=pretrained, **kwargs)
  2710. @register_model
  2711. def resnet50_clip(pretrained=False, **kwargs) -> ByobNet:
  2712. """ OpenAI Modified ResNet-50 CLIP image tower
  2713. """
  2714. return _create_byobnet('resnet50_clip', pretrained=pretrained, **kwargs)
  2715. @register_model
  2716. def resnet101_clip(pretrained=False, **kwargs) -> ByobNet:
  2717. """ OpenAI Modified ResNet-101 CLIP image tower
  2718. """
  2719. return _create_byobnet('resnet101_clip', pretrained=pretrained, **kwargs)
  2720. @register_model
  2721. def resnet50x4_clip(pretrained=False, **kwargs) -> ByobNet:
  2722. """ OpenAI Modified ResNet-50x4 CLIP image tower
  2723. """
  2724. return _create_byobnet('resnet50x4_clip', pretrained=pretrained, **kwargs)
  2725. @register_model
  2726. def resnet50x16_clip(pretrained=False, **kwargs) -> ByobNet:
  2727. """ OpenAI Modified ResNet-50x16 CLIP image tower
  2728. """
  2729. return _create_byobnet('resnet50x16_clip', pretrained=pretrained, **kwargs)
  2730. @register_model
  2731. def resnet50x64_clip(pretrained=False, **kwargs) -> ByobNet:
  2732. """ OpenAI Modified ResNet-50x64 CLIP image tower
  2733. """
  2734. return _create_byobnet('resnet50x64_clip', pretrained=pretrained, **kwargs)
  2735. @register_model
  2736. def resnet50_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2737. """ OpenAI Modified ResNet-50 CLIP image tower w/ avg pool (no attention pool)
  2738. """
  2739. return _create_byobnet('resnet50_clip_gap', pretrained=pretrained, **kwargs)
  2740. @register_model
  2741. def resnet101_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2742. """ OpenAI Modified ResNet-101 CLIP image tower w/ avg pool (no attention pool)
  2743. """
  2744. return _create_byobnet('resnet101_clip_gap', pretrained=pretrained, **kwargs)
  2745. @register_model
  2746. def resnet50x4_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2747. """ OpenAI Modified ResNet-50x4 CLIP image tower w/ avg pool (no attention pool)
  2748. """
  2749. return _create_byobnet('resnet50x4_clip_gap', pretrained=pretrained, **kwargs)
  2750. @register_model
  2751. def resnet50x16_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2752. """ OpenAI Modified ResNet-50x16 CLIP image tower w/ avg pool (no attention pool)
  2753. """
  2754. return _create_byobnet('resnet50x16_clip_gap', pretrained=pretrained, **kwargs)
  2755. @register_model
  2756. def resnet50x64_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2757. """ OpenAI Modified ResNet-50x64 CLIP image tower w/ avg pool (no attention pool)
  2758. """
  2759. return _create_byobnet('resnet50x64_clip_gap', pretrained=pretrained, **kwargs)
  2760. @register_model
  2761. def resnet50_mlp(pretrained=False, **kwargs) -> ByobNet:
  2762. """
  2763. """
  2764. return _create_byobnet('resnet50_mlp', pretrained=pretrained, **kwargs)
  2765. @register_model
  2766. def test_byobnet(pretrained=False, **kwargs) -> ByobNet:
  2767. """ Minimal test ResNet (BYOB based) model.
  2768. """
  2769. return _create_byobnet('test_byobnet', pretrained=pretrained, **kwargs)