inceptionv3.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import math
  15. import paddle
  16. from paddle import nn
  17. from paddle.base.param_attr import ParamAttr
  18. from paddle.nn import AdaptiveAvgPool2D, AvgPool2D, Dropout, Linear, MaxPool2D
  19. from paddle.nn.initializer import Uniform
  20. from paddle.utils.download import get_weights_path_from_url
  21. from ..ops import ConvNormActivation
  22. __all__ = []
  23. model_urls = {
  24. "inception_v3": (
  25. "https://paddle-hapi.bj.bcebos.com/models/inception_v3.pdparams",
  26. "649a4547c3243e8b59c656f41fe330b8",
  27. )
  28. }
  29. class InceptionStem(nn.Layer):
  30. def __init__(self):
  31. super().__init__()
  32. self.conv_1a_3x3 = ConvNormActivation(
  33. in_channels=3,
  34. out_channels=32,
  35. kernel_size=3,
  36. stride=2,
  37. padding=0,
  38. activation_layer=nn.ReLU,
  39. )
  40. self.conv_2a_3x3 = ConvNormActivation(
  41. in_channels=32,
  42. out_channels=32,
  43. kernel_size=3,
  44. stride=1,
  45. padding=0,
  46. activation_layer=nn.ReLU,
  47. )
  48. self.conv_2b_3x3 = ConvNormActivation(
  49. in_channels=32,
  50. out_channels=64,
  51. kernel_size=3,
  52. padding=1,
  53. activation_layer=nn.ReLU,
  54. )
  55. self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
  56. self.conv_3b_1x1 = ConvNormActivation(
  57. in_channels=64,
  58. out_channels=80,
  59. kernel_size=1,
  60. padding=0,
  61. activation_layer=nn.ReLU,
  62. )
  63. self.conv_4a_3x3 = ConvNormActivation(
  64. in_channels=80,
  65. out_channels=192,
  66. kernel_size=3,
  67. padding=0,
  68. activation_layer=nn.ReLU,
  69. )
  70. def forward(self, x):
  71. x = self.conv_1a_3x3(x)
  72. x = self.conv_2a_3x3(x)
  73. x = self.conv_2b_3x3(x)
  74. x = self.max_pool(x)
  75. x = self.conv_3b_1x1(x)
  76. x = self.conv_4a_3x3(x)
  77. x = self.max_pool(x)
  78. return x
  79. class InceptionA(nn.Layer):
  80. def __init__(self, num_channels, pool_features):
  81. super().__init__()
  82. self.branch1x1 = ConvNormActivation(
  83. in_channels=num_channels,
  84. out_channels=64,
  85. kernel_size=1,
  86. padding=0,
  87. activation_layer=nn.ReLU,
  88. )
  89. self.branch5x5_1 = ConvNormActivation(
  90. in_channels=num_channels,
  91. out_channels=48,
  92. kernel_size=1,
  93. padding=0,
  94. activation_layer=nn.ReLU,
  95. )
  96. self.branch5x5_2 = ConvNormActivation(
  97. in_channels=48,
  98. out_channels=64,
  99. kernel_size=5,
  100. padding=2,
  101. activation_layer=nn.ReLU,
  102. )
  103. self.branch3x3dbl_1 = ConvNormActivation(
  104. in_channels=num_channels,
  105. out_channels=64,
  106. kernel_size=1,
  107. padding=0,
  108. activation_layer=nn.ReLU,
  109. )
  110. self.branch3x3dbl_2 = ConvNormActivation(
  111. in_channels=64,
  112. out_channels=96,
  113. kernel_size=3,
  114. padding=1,
  115. activation_layer=nn.ReLU,
  116. )
  117. self.branch3x3dbl_3 = ConvNormActivation(
  118. in_channels=96,
  119. out_channels=96,
  120. kernel_size=3,
  121. padding=1,
  122. activation_layer=nn.ReLU,
  123. )
  124. self.branch_pool = AvgPool2D(
  125. kernel_size=3, stride=1, padding=1, exclusive=False
  126. )
  127. self.branch_pool_conv = ConvNormActivation(
  128. in_channels=num_channels,
  129. out_channels=pool_features,
  130. kernel_size=1,
  131. padding=0,
  132. activation_layer=nn.ReLU,
  133. )
  134. def forward(self, x):
  135. branch1x1 = self.branch1x1(x)
  136. branch5x5 = self.branch5x5_1(x)
  137. branch5x5 = self.branch5x5_2(branch5x5)
  138. branch3x3dbl = self.branch3x3dbl_1(x)
  139. branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
  140. branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
  141. branch_pool = self.branch_pool(x)
  142. branch_pool = self.branch_pool_conv(branch_pool)
  143. x = paddle.concat(
  144. [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1
  145. )
  146. return x
  147. class InceptionB(nn.Layer):
  148. def __init__(self, num_channels):
  149. super().__init__()
  150. self.branch3x3 = ConvNormActivation(
  151. in_channels=num_channels,
  152. out_channels=384,
  153. kernel_size=3,
  154. stride=2,
  155. padding=0,
  156. activation_layer=nn.ReLU,
  157. )
  158. self.branch3x3dbl_1 = ConvNormActivation(
  159. in_channels=num_channels,
  160. out_channels=64,
  161. kernel_size=1,
  162. padding=0,
  163. activation_layer=nn.ReLU,
  164. )
  165. self.branch3x3dbl_2 = ConvNormActivation(
  166. in_channels=64,
  167. out_channels=96,
  168. kernel_size=3,
  169. padding=1,
  170. activation_layer=nn.ReLU,
  171. )
  172. self.branch3x3dbl_3 = ConvNormActivation(
  173. in_channels=96,
  174. out_channels=96,
  175. kernel_size=3,
  176. stride=2,
  177. padding=0,
  178. activation_layer=nn.ReLU,
  179. )
  180. self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
  181. def forward(self, x):
  182. branch3x3 = self.branch3x3(x)
  183. branch3x3dbl = self.branch3x3dbl_1(x)
  184. branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
  185. branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
  186. branch_pool = self.branch_pool(x)
  187. x = paddle.concat([branch3x3, branch3x3dbl, branch_pool], axis=1)
  188. return x
  189. class InceptionC(nn.Layer):
  190. def __init__(self, num_channels, channels_7x7):
  191. super().__init__()
  192. self.branch1x1 = ConvNormActivation(
  193. in_channels=num_channels,
  194. out_channels=192,
  195. kernel_size=1,
  196. padding=0,
  197. activation_layer=nn.ReLU,
  198. )
  199. self.branch7x7_1 = ConvNormActivation(
  200. in_channels=num_channels,
  201. out_channels=channels_7x7,
  202. kernel_size=1,
  203. stride=1,
  204. padding=0,
  205. activation_layer=nn.ReLU,
  206. )
  207. self.branch7x7_2 = ConvNormActivation(
  208. in_channels=channels_7x7,
  209. out_channels=channels_7x7,
  210. kernel_size=(1, 7),
  211. stride=1,
  212. padding=(0, 3),
  213. activation_layer=nn.ReLU,
  214. )
  215. self.branch7x7_3 = ConvNormActivation(
  216. in_channels=channels_7x7,
  217. out_channels=192,
  218. kernel_size=(7, 1),
  219. stride=1,
  220. padding=(3, 0),
  221. activation_layer=nn.ReLU,
  222. )
  223. self.branch7x7dbl_1 = ConvNormActivation(
  224. in_channels=num_channels,
  225. out_channels=channels_7x7,
  226. kernel_size=1,
  227. padding=0,
  228. activation_layer=nn.ReLU,
  229. )
  230. self.branch7x7dbl_2 = ConvNormActivation(
  231. in_channels=channels_7x7,
  232. out_channels=channels_7x7,
  233. kernel_size=(7, 1),
  234. padding=(3, 0),
  235. activation_layer=nn.ReLU,
  236. )
  237. self.branch7x7dbl_3 = ConvNormActivation(
  238. in_channels=channels_7x7,
  239. out_channels=channels_7x7,
  240. kernel_size=(1, 7),
  241. padding=(0, 3),
  242. activation_layer=nn.ReLU,
  243. )
  244. self.branch7x7dbl_4 = ConvNormActivation(
  245. in_channels=channels_7x7,
  246. out_channels=channels_7x7,
  247. kernel_size=(7, 1),
  248. padding=(3, 0),
  249. activation_layer=nn.ReLU,
  250. )
  251. self.branch7x7dbl_5 = ConvNormActivation(
  252. in_channels=channels_7x7,
  253. out_channels=192,
  254. kernel_size=(1, 7),
  255. padding=(0, 3),
  256. activation_layer=nn.ReLU,
  257. )
  258. self.branch_pool = AvgPool2D(
  259. kernel_size=3, stride=1, padding=1, exclusive=False
  260. )
  261. self.branch_pool_conv = ConvNormActivation(
  262. in_channels=num_channels,
  263. out_channels=192,
  264. kernel_size=1,
  265. padding=0,
  266. activation_layer=nn.ReLU,
  267. )
  268. def forward(self, x):
  269. branch1x1 = self.branch1x1(x)
  270. branch7x7 = self.branch7x7_1(x)
  271. branch7x7 = self.branch7x7_2(branch7x7)
  272. branch7x7 = self.branch7x7_3(branch7x7)
  273. branch7x7dbl = self.branch7x7dbl_1(x)
  274. branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
  275. branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
  276. branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
  277. branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
  278. branch_pool = self.branch_pool(x)
  279. branch_pool = self.branch_pool_conv(branch_pool)
  280. x = paddle.concat(
  281. [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1
  282. )
  283. return x
  284. class InceptionD(nn.Layer):
  285. def __init__(self, num_channels):
  286. super().__init__()
  287. self.branch3x3_1 = ConvNormActivation(
  288. in_channels=num_channels,
  289. out_channels=192,
  290. kernel_size=1,
  291. padding=0,
  292. activation_layer=nn.ReLU,
  293. )
  294. self.branch3x3_2 = ConvNormActivation(
  295. in_channels=192,
  296. out_channels=320,
  297. kernel_size=3,
  298. stride=2,
  299. padding=0,
  300. activation_layer=nn.ReLU,
  301. )
  302. self.branch7x7x3_1 = ConvNormActivation(
  303. in_channels=num_channels,
  304. out_channels=192,
  305. kernel_size=1,
  306. padding=0,
  307. activation_layer=nn.ReLU,
  308. )
  309. self.branch7x7x3_2 = ConvNormActivation(
  310. in_channels=192,
  311. out_channels=192,
  312. kernel_size=(1, 7),
  313. padding=(0, 3),
  314. activation_layer=nn.ReLU,
  315. )
  316. self.branch7x7x3_3 = ConvNormActivation(
  317. in_channels=192,
  318. out_channels=192,
  319. kernel_size=(7, 1),
  320. padding=(3, 0),
  321. activation_layer=nn.ReLU,
  322. )
  323. self.branch7x7x3_4 = ConvNormActivation(
  324. in_channels=192,
  325. out_channels=192,
  326. kernel_size=3,
  327. stride=2,
  328. padding=0,
  329. activation_layer=nn.ReLU,
  330. )
  331. self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
  332. def forward(self, x):
  333. branch3x3 = self.branch3x3_1(x)
  334. branch3x3 = self.branch3x3_2(branch3x3)
  335. branch7x7x3 = self.branch7x7x3_1(x)
  336. branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
  337. branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
  338. branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
  339. branch_pool = self.branch_pool(x)
  340. x = paddle.concat([branch3x3, branch7x7x3, branch_pool], axis=1)
  341. return x
  342. class InceptionE(nn.Layer):
  343. def __init__(self, num_channels):
  344. super().__init__()
  345. self.branch1x1 = ConvNormActivation(
  346. in_channels=num_channels,
  347. out_channels=320,
  348. kernel_size=1,
  349. padding=0,
  350. activation_layer=nn.ReLU,
  351. )
  352. self.branch3x3_1 = ConvNormActivation(
  353. in_channels=num_channels,
  354. out_channels=384,
  355. kernel_size=1,
  356. padding=0,
  357. activation_layer=nn.ReLU,
  358. )
  359. self.branch3x3_2a = ConvNormActivation(
  360. in_channels=384,
  361. out_channels=384,
  362. kernel_size=(1, 3),
  363. padding=(0, 1),
  364. activation_layer=nn.ReLU,
  365. )
  366. self.branch3x3_2b = ConvNormActivation(
  367. in_channels=384,
  368. out_channels=384,
  369. kernel_size=(3, 1),
  370. padding=(1, 0),
  371. activation_layer=nn.ReLU,
  372. )
  373. self.branch3x3dbl_1 = ConvNormActivation(
  374. in_channels=num_channels,
  375. out_channels=448,
  376. kernel_size=1,
  377. padding=0,
  378. activation_layer=nn.ReLU,
  379. )
  380. self.branch3x3dbl_2 = ConvNormActivation(
  381. in_channels=448,
  382. out_channels=384,
  383. kernel_size=3,
  384. padding=1,
  385. activation_layer=nn.ReLU,
  386. )
  387. self.branch3x3dbl_3a = ConvNormActivation(
  388. in_channels=384,
  389. out_channels=384,
  390. kernel_size=(1, 3),
  391. padding=(0, 1),
  392. activation_layer=nn.ReLU,
  393. )
  394. self.branch3x3dbl_3b = ConvNormActivation(
  395. in_channels=384,
  396. out_channels=384,
  397. kernel_size=(3, 1),
  398. padding=(1, 0),
  399. activation_layer=nn.ReLU,
  400. )
  401. self.branch_pool = AvgPool2D(
  402. kernel_size=3, stride=1, padding=1, exclusive=False
  403. )
  404. self.branch_pool_conv = ConvNormActivation(
  405. in_channels=num_channels,
  406. out_channels=192,
  407. kernel_size=1,
  408. padding=0,
  409. activation_layer=nn.ReLU,
  410. )
  411. def forward(self, x):
  412. branch1x1 = self.branch1x1(x)
  413. branch3x3 = self.branch3x3_1(x)
  414. branch3x3 = [
  415. self.branch3x3_2a(branch3x3),
  416. self.branch3x3_2b(branch3x3),
  417. ]
  418. branch3x3 = paddle.concat(branch3x3, axis=1)
  419. branch3x3dbl = self.branch3x3dbl_1(x)
  420. branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
  421. branch3x3dbl = [
  422. self.branch3x3dbl_3a(branch3x3dbl),
  423. self.branch3x3dbl_3b(branch3x3dbl),
  424. ]
  425. branch3x3dbl = paddle.concat(branch3x3dbl, axis=1)
  426. branch_pool = self.branch_pool(x)
  427. branch_pool = self.branch_pool_conv(branch_pool)
  428. x = paddle.concat(
  429. [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1
  430. )
  431. return x
  432. class InceptionV3(nn.Layer):
  433. """Inception v3 model from
  434. `"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/pdf/1512.00567.pdf>`_.
  435. Args:
  436. num_classes (int, optional): Output dim of last fc layer. If num_classes <= 0, last fc layer
  437. will not be defined. Default: 1000.
  438. with_pool (bool, optional): Use pool before the last fc layer or not. Default: True.
  439. Returns:
  440. :ref:`api_paddle_nn_Layer`. An instance of Inception v3 model.
  441. Examples:
  442. .. code-block:: python
  443. >>> import paddle
  444. >>> from paddle.vision.models import InceptionV3
  445. >>> inception_v3 = InceptionV3()
  446. >>> x = paddle.rand([1, 3, 299, 299])
  447. >>> out = inception_v3(x)
  448. >>> print(out.shape)
  449. [1, 1000]
  450. """
  451. def __init__(self, num_classes=1000, with_pool=True):
  452. super().__init__()
  453. self.num_classes = num_classes
  454. self.with_pool = with_pool
  455. self.layers_config = {
  456. "inception_a": [[192, 256, 288], [32, 64, 64]],
  457. "inception_b": [288],
  458. "inception_c": [[768, 768, 768, 768], [128, 160, 160, 192]],
  459. "inception_d": [768],
  460. "inception_e": [1280, 2048],
  461. }
  462. inception_a_list = self.layers_config["inception_a"]
  463. inception_c_list = self.layers_config["inception_c"]
  464. inception_b_list = self.layers_config["inception_b"]
  465. inception_d_list = self.layers_config["inception_d"]
  466. inception_e_list = self.layers_config["inception_e"]
  467. self.inception_stem = InceptionStem()
  468. self.inception_block_list = nn.LayerList()
  469. for i in range(len(inception_a_list[0])):
  470. inception_a = InceptionA(
  471. inception_a_list[0][i], inception_a_list[1][i]
  472. )
  473. self.inception_block_list.append(inception_a)
  474. for i in range(len(inception_b_list)):
  475. inception_b = InceptionB(inception_b_list[i])
  476. self.inception_block_list.append(inception_b)
  477. for i in range(len(inception_c_list[0])):
  478. inception_c = InceptionC(
  479. inception_c_list[0][i], inception_c_list[1][i]
  480. )
  481. self.inception_block_list.append(inception_c)
  482. for i in range(len(inception_d_list)):
  483. inception_d = InceptionD(inception_d_list[i])
  484. self.inception_block_list.append(inception_d)
  485. for i in range(len(inception_e_list)):
  486. inception_e = InceptionE(inception_e_list[i])
  487. self.inception_block_list.append(inception_e)
  488. if with_pool:
  489. self.avg_pool = AdaptiveAvgPool2D(1)
  490. if num_classes > 0:
  491. self.dropout = Dropout(p=0.2, mode="downscale_in_infer")
  492. stdv = 1.0 / math.sqrt(2048 * 1.0)
  493. self.fc = Linear(
  494. 2048,
  495. num_classes,
  496. weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
  497. bias_attr=ParamAttr(),
  498. )
  499. def forward(self, x):
  500. x = self.inception_stem(x)
  501. for inception_block in self.inception_block_list:
  502. x = inception_block(x)
  503. if self.with_pool:
  504. x = self.avg_pool(x)
  505. if self.num_classes > 0:
  506. x = paddle.reshape(x, shape=[-1, 2048])
  507. x = self.dropout(x)
  508. x = self.fc(x)
  509. return x
  510. def inception_v3(pretrained=False, **kwargs):
  511. """Inception v3 model from
  512. `"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/pdf/1512.00567.pdf>`_.
  513. Args:
  514. pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
  515. on ImageNet. Default: False.
  516. **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`InceptionV3 <api_paddle_vision_models_InceptionV3>`.
  517. Returns:
  518. :ref:`api_paddle_nn_Layer`. An instance of Inception v3 model.
  519. Examples:
  520. .. code-block:: python
  521. >>> import paddle
  522. >>> from paddle.vision.models import inception_v3
  523. >>> # Build model
  524. >>> model = inception_v3()
  525. >>> # Build model and load imagenet pretrained weight
  526. >>> # model = inception_v3(pretrained=True)
  527. >>> x = paddle.rand([1, 3, 299, 299])
  528. >>> out = model(x)
  529. >>> print(out.shape)
  530. [1, 1000]
  531. """
  532. model = InceptionV3(**kwargs)
  533. arch = "inception_v3"
  534. if pretrained:
  535. assert (
  536. arch in model_urls
  537. ), f"{arch} model do not have a pretrained model now, you should set pretrained=False"
  538. weight_path = get_weights_path_from_url(
  539. model_urls[arch][0], model_urls[arch][1]
  540. )
  541. param = paddle.load(weight_path)
  542. model.set_dict(param)
  543. return model