shufflenetv2.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. from paddle import nn
  16. from paddle.nn import AdaptiveAvgPool2D, Linear, MaxPool2D
  17. from paddle.utils.download import get_weights_path_from_url
  18. from ..ops import ConvNormActivation
  19. __all__ = []
  20. model_urls = {
  21. "shufflenet_v2_x0_25": (
  22. "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x0_25.pdparams",
  23. "1e509b4c140eeb096bb16e214796d03b",
  24. ),
  25. "shufflenet_v2_x0_33": (
  26. "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x0_33.pdparams",
  27. "3d7b3ab0eaa5c0927ff1026d31b729bd",
  28. ),
  29. "shufflenet_v2_x0_5": (
  30. "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x0_5.pdparams",
  31. "5e5cee182a7793c4e4c73949b1a71bd4",
  32. ),
  33. "shufflenet_v2_x1_0": (
  34. "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x1_0.pdparams",
  35. "122d42478b9e81eb49f8a9ede327b1a4",
  36. ),
  37. "shufflenet_v2_x1_5": (
  38. "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x1_5.pdparams",
  39. "faced5827380d73531d0ee027c67826d",
  40. ),
  41. "shufflenet_v2_x2_0": (
  42. "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x2_0.pdparams",
  43. "cd3dddcd8305e7bcd8ad14d1c69a5784",
  44. ),
  45. "shufflenet_v2_swish": (
  46. "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_swish.pdparams",
  47. "adde0aa3b023e5b0c94a68be1c394b84",
  48. ),
  49. }
  50. def create_activation_layer(act):
  51. if act == "swish":
  52. return nn.Swish
  53. elif act == "relu":
  54. return nn.ReLU
  55. elif act is None:
  56. return None
  57. else:
  58. raise RuntimeError(f"The activation function is not supported: {act}")
  59. def channel_shuffle(x, groups):
  60. batch_size, num_channels, height, width = x.shape[0:4]
  61. channels_per_group = num_channels // groups
  62. # reshape
  63. x = paddle.reshape(
  64. x, shape=[batch_size, groups, channels_per_group, height, width]
  65. )
  66. # transpose
  67. x = paddle.transpose(x, perm=[0, 2, 1, 3, 4])
  68. # flatten
  69. x = paddle.reshape(x, shape=[batch_size, num_channels, height, width])
  70. return x
  71. class InvertedResidual(nn.Layer):
  72. def __init__(
  73. self, in_channels, out_channels, stride, activation_layer=nn.ReLU
  74. ):
  75. super().__init__()
  76. self._conv_pw = ConvNormActivation(
  77. in_channels=in_channels // 2,
  78. out_channels=out_channels // 2,
  79. kernel_size=1,
  80. stride=1,
  81. padding=0,
  82. groups=1,
  83. activation_layer=activation_layer,
  84. )
  85. self._conv_dw = ConvNormActivation(
  86. in_channels=out_channels // 2,
  87. out_channels=out_channels // 2,
  88. kernel_size=3,
  89. stride=stride,
  90. padding=1,
  91. groups=out_channels // 2,
  92. activation_layer=None,
  93. )
  94. self._conv_linear = ConvNormActivation(
  95. in_channels=out_channels // 2,
  96. out_channels=out_channels // 2,
  97. kernel_size=1,
  98. stride=1,
  99. padding=0,
  100. groups=1,
  101. activation_layer=activation_layer,
  102. )
  103. def forward(self, inputs):
  104. x1, x2 = paddle.split(
  105. inputs,
  106. num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
  107. axis=1,
  108. )
  109. x2 = self._conv_pw(x2)
  110. x2 = self._conv_dw(x2)
  111. x2 = self._conv_linear(x2)
  112. out = paddle.concat([x1, x2], axis=1)
  113. return channel_shuffle(out, 2)
  114. class InvertedResidualDS(nn.Layer):
  115. def __init__(
  116. self, in_channels, out_channels, stride, activation_layer=nn.ReLU
  117. ):
  118. super().__init__()
  119. # branch1
  120. self._conv_dw_1 = ConvNormActivation(
  121. in_channels=in_channels,
  122. out_channels=in_channels,
  123. kernel_size=3,
  124. stride=stride,
  125. padding=1,
  126. groups=in_channels,
  127. activation_layer=None,
  128. )
  129. self._conv_linear_1 = ConvNormActivation(
  130. in_channels=in_channels,
  131. out_channels=out_channels // 2,
  132. kernel_size=1,
  133. stride=1,
  134. padding=0,
  135. groups=1,
  136. activation_layer=activation_layer,
  137. )
  138. # branch2
  139. self._conv_pw_2 = ConvNormActivation(
  140. in_channels=in_channels,
  141. out_channels=out_channels // 2,
  142. kernel_size=1,
  143. stride=1,
  144. padding=0,
  145. groups=1,
  146. activation_layer=activation_layer,
  147. )
  148. self._conv_dw_2 = ConvNormActivation(
  149. in_channels=out_channels // 2,
  150. out_channels=out_channels // 2,
  151. kernel_size=3,
  152. stride=stride,
  153. padding=1,
  154. groups=out_channels // 2,
  155. activation_layer=None,
  156. )
  157. self._conv_linear_2 = ConvNormActivation(
  158. in_channels=out_channels // 2,
  159. out_channels=out_channels // 2,
  160. kernel_size=1,
  161. stride=1,
  162. padding=0,
  163. groups=1,
  164. activation_layer=activation_layer,
  165. )
  166. def forward(self, inputs):
  167. x1 = self._conv_dw_1(inputs)
  168. x1 = self._conv_linear_1(x1)
  169. x2 = self._conv_pw_2(inputs)
  170. x2 = self._conv_dw_2(x2)
  171. x2 = self._conv_linear_2(x2)
  172. out = paddle.concat([x1, x2], axis=1)
  173. return channel_shuffle(out, 2)
  174. class ShuffleNetV2(nn.Layer):
  175. """ShuffleNetV2 model from
  176. `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
  177. Args:
  178. scale (float, optional): Scale of output channels. Default: True.
  179. act (str, optional): Activation function of neural network. Default: "relu".
  180. num_classes (int, optional): Output dim of last fc layer. If num_classes <= 0, last fc layer
  181. will not be defined. Default: 1000.
  182. with_pool (bool, optional): Use pool before the last fc layer or not. Default: True.
  183. Returns:
  184. :ref:`api_paddle_nn_Layer`. An instance of ShuffleNetV2 model.
  185. Examples:
  186. .. code-block:: python
  187. >>> import paddle
  188. >>> from paddle.vision.models import ShuffleNetV2
  189. >>> shufflenet_v2_swish = ShuffleNetV2(scale=1.0, act="swish")
  190. >>> x = paddle.rand([1, 3, 224, 224])
  191. >>> out = shufflenet_v2_swish(x)
  192. >>> print(out.shape)
  193. [1, 1000]
  194. """
  195. def __init__(self, scale=1.0, act="relu", num_classes=1000, with_pool=True):
  196. super().__init__()
  197. self.scale = scale
  198. self.num_classes = num_classes
  199. self.with_pool = with_pool
  200. stage_repeats = [4, 8, 4]
  201. activation_layer = create_activation_layer(act)
  202. if scale == 0.25:
  203. stage_out_channels = [-1, 24, 24, 48, 96, 512]
  204. elif scale == 0.33:
  205. stage_out_channels = [-1, 24, 32, 64, 128, 512]
  206. elif scale == 0.5:
  207. stage_out_channels = [-1, 24, 48, 96, 192, 1024]
  208. elif scale == 1.0:
  209. stage_out_channels = [-1, 24, 116, 232, 464, 1024]
  210. elif scale == 1.5:
  211. stage_out_channels = [-1, 24, 176, 352, 704, 1024]
  212. elif scale == 2.0:
  213. stage_out_channels = [-1, 24, 224, 488, 976, 2048]
  214. else:
  215. raise NotImplementedError(
  216. "This scale size:[" + str(scale) + "] is not implemented!"
  217. )
  218. # 1. conv1
  219. self._conv1 = ConvNormActivation(
  220. in_channels=3,
  221. out_channels=stage_out_channels[1],
  222. kernel_size=3,
  223. stride=2,
  224. padding=1,
  225. activation_layer=activation_layer,
  226. )
  227. self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
  228. # 2. bottleneck sequences
  229. self._block_list = []
  230. for stage_id, num_repeat in enumerate(stage_repeats):
  231. for i in range(num_repeat):
  232. if i == 0:
  233. block = self.add_sublayer(
  234. sublayer=InvertedResidualDS(
  235. in_channels=stage_out_channels[stage_id + 1],
  236. out_channels=stage_out_channels[stage_id + 2],
  237. stride=2,
  238. activation_layer=activation_layer,
  239. ),
  240. name=str(stage_id + 2) + "_" + str(i + 1),
  241. )
  242. else:
  243. block = self.add_sublayer(
  244. sublayer=InvertedResidual(
  245. in_channels=stage_out_channels[stage_id + 2],
  246. out_channels=stage_out_channels[stage_id + 2],
  247. stride=1,
  248. activation_layer=activation_layer,
  249. ),
  250. name=str(stage_id + 2) + "_" + str(i + 1),
  251. )
  252. self._block_list.append(block)
  253. # 3. last_conv
  254. self._last_conv = ConvNormActivation(
  255. in_channels=stage_out_channels[-2],
  256. out_channels=stage_out_channels[-1],
  257. kernel_size=1,
  258. stride=1,
  259. padding=0,
  260. activation_layer=activation_layer,
  261. )
  262. # 4. pool
  263. if with_pool:
  264. self._pool2d_avg = AdaptiveAvgPool2D(1)
  265. # 5. fc
  266. if num_classes > 0:
  267. self._out_c = stage_out_channels[-1]
  268. self._fc = Linear(stage_out_channels[-1], num_classes)
  269. def forward(self, inputs):
  270. x = self._conv1(inputs)
  271. x = self._max_pool(x)
  272. for inv in self._block_list:
  273. x = inv(x)
  274. x = self._last_conv(x)
  275. if self.with_pool:
  276. x = self._pool2d_avg(x)
  277. if self.num_classes > 0:
  278. x = paddle.flatten(x, start_axis=1, stop_axis=-1)
  279. x = self._fc(x)
  280. return x
  281. def _shufflenet_v2(arch, pretrained=False, **kwargs):
  282. model = ShuffleNetV2(**kwargs)
  283. if pretrained:
  284. assert (
  285. arch in model_urls
  286. ), f"{arch} model do not have a pretrained model now, you should set pretrained=False"
  287. weight_path = get_weights_path_from_url(
  288. model_urls[arch][0], model_urls[arch][1]
  289. )
  290. param = paddle.load(weight_path)
  291. model.set_dict(param)
  292. return model
  293. def shufflenet_v2_x0_25(pretrained=False, **kwargs):
  294. """ShuffleNetV2 with 0.25x output channels, as described in
  295. `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
  296. Args:
  297. pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
  298. on ImageNet. Default: False.
  299. **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`ShuffleNetV2 <api_paddle_vision_models_ShuffleNetV2>`.
  300. Returns:
  301. :ref:`api_paddle_nn_Layer`. An instance of ShuffleNetV2 with 0.25x output channels.
  302. Examples:
  303. .. code-block:: python
  304. >>> import paddle
  305. >>> from paddle.vision.models import shufflenet_v2_x0_25
  306. >>> # build model
  307. >>> model = shufflenet_v2_x0_25()
  308. >>> # build model and load imagenet pretrained weight
  309. >>> # model = shufflenet_v2_x0_25(pretrained=True)
  310. >>> x = paddle.rand([1, 3, 224, 224])
  311. >>> out = model(x)
  312. >>> print(out.shape)
  313. [1, 1000]
  314. """
  315. return _shufflenet_v2(
  316. "shufflenet_v2_x0_25", scale=0.25, pretrained=pretrained, **kwargs
  317. )
  318. def shufflenet_v2_x0_33(pretrained=False, **kwargs):
  319. """ShuffleNetV2 with 0.33x output channels, as described in
  320. `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
  321. Args:
  322. pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
  323. on ImageNet. Default: False.
  324. **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`ShuffleNetV2 <api_paddle_vision_models_ShuffleNetV2>`.
  325. Returns:
  326. :ref:`api_paddle_nn_Layer`. An instance of ShuffleNetV2 with 0.33x output channels.
  327. Examples:
  328. .. code-block:: python
  329. >>> import paddle
  330. >>> from paddle.vision.models import shufflenet_v2_x0_33
  331. >>> # build model
  332. >>> model = shufflenet_v2_x0_33()
  333. >>> # build model and load imagenet pretrained weight
  334. >>> # model = shufflenet_v2_x0_33(pretrained=True)
  335. >>> x = paddle.rand([1, 3, 224, 224])
  336. >>> out = model(x)
  337. >>> print(out.shape)
  338. [1, 1000]
  339. """
  340. return _shufflenet_v2(
  341. "shufflenet_v2_x0_33", scale=0.33, pretrained=pretrained, **kwargs
  342. )
  343. def shufflenet_v2_x0_5(pretrained=False, **kwargs):
  344. """ShuffleNetV2 with 0.5x output channels, as described in
  345. `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
  346. Args:
  347. pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
  348. on ImageNet. Default: False.
  349. **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`ShuffleNetV2 <api_paddle_vision_models_ShuffleNetV2>`.
  350. Returns:
  351. :ref:`api_paddle_nn_Layer`. An instance of ShuffleNetV2 with 0.5x output channels.
  352. Examples:
  353. .. code-block:: python
  354. >>> import paddle
  355. >>> from paddle.vision.models import shufflenet_v2_x0_5
  356. >>> # build model
  357. >>> model = shufflenet_v2_x0_5()
  358. >>> # build model and load imagenet pretrained weight
  359. >>> # model = shufflenet_v2_x0_5(pretrained=True)
  360. >>> x = paddle.rand([1, 3, 224, 224])
  361. >>> out = model(x)
  362. >>> print(out.shape)
  363. [1, 1000]
  364. """
  365. return _shufflenet_v2(
  366. "shufflenet_v2_x0_5", scale=0.5, pretrained=pretrained, **kwargs
  367. )
  368. def shufflenet_v2_x1_0(pretrained=False, **kwargs):
  369. """ShuffleNetV2 with 1.0x output channels, as described in
  370. `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
  371. Args:
  372. pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
  373. on ImageNet. Default: False.
  374. **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`ShuffleNetV2 <api_paddle_vision_models_ShuffleNetV2>`.
  375. Returns:
  376. :ref:`api_paddle_nn_Layer`. An instance of ShuffleNetV2 with 1.0x output channels.
  377. Examples:
  378. .. code-block:: python
  379. >>> import paddle
  380. >>> from paddle.vision.models import shufflenet_v2_x1_0
  381. >>> # build model
  382. >>> model = shufflenet_v2_x1_0()
  383. >>> # build model and load imagenet pretrained weight
  384. >>> # model = shufflenet_v2_x1_0(pretrained=True)
  385. >>> x = paddle.rand([1, 3, 224, 224])
  386. >>> out = model(x)
  387. >>> print(out.shape)
  388. [1, 1000]
  389. """
  390. return _shufflenet_v2(
  391. "shufflenet_v2_x1_0", scale=1.0, pretrained=pretrained, **kwargs
  392. )
  393. def shufflenet_v2_x1_5(pretrained=False, **kwargs):
  394. """ShuffleNetV2 with 1.5x output channels, as described in
  395. `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
  396. Args:
  397. pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
  398. on ImageNet. Default: False.
  399. **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`ShuffleNetV2 <api_paddle_vision_models_ShuffleNetV2>`.
  400. Returns:
  401. :ref:`api_paddle_nn_Layer`. An instance of ShuffleNetV2 with 1.5x output channels.
  402. Examples:
  403. .. code-block:: python
  404. >>> import paddle
  405. >>> from paddle.vision.models import shufflenet_v2_x1_5
  406. >>> # build model
  407. >>> model = shufflenet_v2_x1_5()
  408. >>> # build model and load imagenet pretrained weight
  409. >>> # model = shufflenet_v2_x1_5(pretrained=True)
  410. >>> x = paddle.rand([1, 3, 224, 224])
  411. >>> out = model(x)
  412. >>> print(out.shape)
  413. [1, 1000]
  414. """
  415. return _shufflenet_v2(
  416. "shufflenet_v2_x1_5", scale=1.5, pretrained=pretrained, **kwargs
  417. )
  418. def shufflenet_v2_x2_0(pretrained=False, **kwargs):
  419. """ShuffleNetV2 with 2.0x output channels, as described in
  420. `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
  421. Args:
  422. pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
  423. on ImageNet. Default: False.
  424. **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`ShuffleNetV2 <api_paddle_vision_models_ShuffleNetV2>`.
  425. Returns:
  426. :ref:`api_paddle_nn_Layer`. An instance of ShuffleNetV2 with 2.0x output channels.
  427. Examples:
  428. .. code-block:: python
  429. >>> import paddle
  430. >>> from paddle.vision.models import shufflenet_v2_x2_0
  431. >>> # build model
  432. >>> model = shufflenet_v2_x2_0()
  433. >>> # build model and load imagenet pretrained weight
  434. >>> # model = shufflenet_v2_x2_0(pretrained=True)
  435. >>> x = paddle.rand([1, 3, 224, 224])
  436. >>> out = model(x)
  437. >>> print(out.shape)
  438. [1, 1000]
  439. """
  440. return _shufflenet_v2(
  441. "shufflenet_v2_x2_0", scale=2.0, pretrained=pretrained, **kwargs
  442. )
  443. def shufflenet_v2_swish(pretrained=False, **kwargs):
  444. """ShuffleNetV2 with swish activation function, as described in
  445. `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
  446. Args:
  447. pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
  448. on ImageNet. Default: False.
  449. **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`ShuffleNetV2 <api_paddle_vision_models_ShuffleNetV2>`.
  450. Returns:
  451. :ref:`api_paddle_nn_Layer`. An instance of ShuffleNetV2 with swish activation function.
  452. Examples:
  453. .. code-block:: python
  454. >>> import paddle
  455. >>> from paddle.vision.models import shufflenet_v2_swish
  456. >>> # build model
  457. >>> model = shufflenet_v2_swish()
  458. >>> # build model and load imagenet pretrained weight
  459. >>> # model = shufflenet_v2_swish(pretrained=True)
  460. >>> x = paddle.rand([1, 3, 224, 224])
  461. >>> out = model(x)
  462. >>> print(out.shape)
  463. [1, 1000]
  464. """
  465. return _shufflenet_v2(
  466. "shufflenet_v2_swish",
  467. scale=1.0,
  468. act="swish",
  469. pretrained=pretrained,
  470. **kwargs,
  471. )