common.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. """
  3. Common modules
  4. """
  5. import json
  6. import math
  7. import platform
  8. import warnings
  9. from collections import OrderedDict, namedtuple
  10. from copy import copy
  11. from pathlib import Path
  12. import cv2
  13. import numpy as np
  14. import requests
  15. import torch
  16. import torch.nn as nn
  17. from PIL import Image
  18. from torch.cuda import amp
  19. from utils.yolov5_utils import make_divisible, initialize_weights, check_anchor_order, check_version, fuse_conv_and_bn
  20. def autopad(k, p=None): # kernel, padding
  21. # Pad to 'same'
  22. if p is None:
  23. p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
  24. return p
  25. class Conv(nn.Module):
  26. # Standard convolution
  27. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  28. super().__init__()
  29. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
  30. self.bn = nn.BatchNorm2d(c2)
  31. if isinstance(act, bool):
  32. self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
  33. elif isinstance(act, str):
  34. if act == 'leaky':
  35. self.act = nn.LeakyReLU(0.1, inplace=True)
  36. elif act == 'relu':
  37. self.act = nn.ReLU(inplace=True)
  38. else:
  39. self.act = None
  40. def forward(self, x):
  41. return self.act(self.bn(self.conv(x)))
  42. def forward_fuse(self, x):
  43. return self.act(self.conv(x))
  44. class DWConv(Conv):
  45. # Depth-wise convolution class
  46. def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  47. super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
  48. class TransformerLayer(nn.Module):
  49. # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
  50. def __init__(self, c, num_heads):
  51. super().__init__()
  52. self.q = nn.Linear(c, c, bias=False)
  53. self.k = nn.Linear(c, c, bias=False)
  54. self.v = nn.Linear(c, c, bias=False)
  55. self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
  56. self.fc1 = nn.Linear(c, c, bias=False)
  57. self.fc2 = nn.Linear(c, c, bias=False)
  58. def forward(self, x):
  59. x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
  60. x = self.fc2(self.fc1(x)) + x
  61. return x
  62. class TransformerBlock(nn.Module):
  63. # Vision Transformer https://arxiv.org/abs/2010.11929
  64. def __init__(self, c1, c2, num_heads, num_layers):
  65. super().__init__()
  66. self.conv = None
  67. if c1 != c2:
  68. self.conv = Conv(c1, c2)
  69. self.linear = nn.Linear(c2, c2) # learnable position embedding
  70. self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
  71. self.c2 = c2
  72. def forward(self, x):
  73. if self.conv is not None:
  74. x = self.conv(x)
  75. b, _, w, h = x.shape
  76. p = x.flatten(2).permute(2, 0, 1)
  77. return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
  78. class Bottleneck(nn.Module):
  79. # Standard bottleneck
  80. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, act=True): # ch_in, ch_out, shortcut, groups, expansion
  81. super().__init__()
  82. c_ = int(c2 * e) # hidden channels
  83. self.cv1 = Conv(c1, c_, 1, 1, act=act)
  84. self.cv2 = Conv(c_, c2, 3, 1, g=g, act=act)
  85. self.add = shortcut and c1 == c2
  86. def forward(self, x):
  87. return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
  88. class BottleneckCSP(nn.Module):
  89. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  90. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  91. super().__init__()
  92. c_ = int(c2 * e) # hidden channels
  93. self.cv1 = Conv(c1, c_, 1, 1)
  94. self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
  95. self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
  96. self.cv4 = Conv(2 * c_, c2, 1, 1)
  97. self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
  98. self.act = nn.SiLU()
  99. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
  100. def forward(self, x):
  101. y1 = self.cv3(self.m(self.cv1(x)))
  102. y2 = self.cv2(x)
  103. return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
  104. class C3(nn.Module):
  105. # CSP Bottleneck with 3 convolutions
  106. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, act=True): # ch_in, ch_out, number, shortcut, groups, expansion
  107. super().__init__()
  108. c_ = int(c2 * e) # hidden channels
  109. self.cv1 = Conv(c1, c_, 1, 1, act=act)
  110. self.cv2 = Conv(c1, c_, 1, 1, act=act)
  111. self.cv3 = Conv(2 * c_, c2, 1, act=act) # act=FReLU(c2)
  112. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0, act=act) for _ in range(n)))
  113. # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
  114. def forward(self, x):
  115. return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
  116. class C3TR(C3):
  117. # C3 module with TransformerBlock()
  118. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  119. super().__init__(c1, c2, n, shortcut, g, e)
  120. c_ = int(c2 * e)
  121. self.m = TransformerBlock(c_, c_, 4, n)
  122. class C3SPP(C3):
  123. # C3 module with SPP()
  124. def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
  125. super().__init__(c1, c2, n, shortcut, g, e)
  126. c_ = int(c2 * e)
  127. self.m = SPP(c_, c_, k)
  128. class C3Ghost(C3):
  129. # C3 module with GhostBottleneck()
  130. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  131. super().__init__(c1, c2, n, shortcut, g, e)
  132. c_ = int(c2 * e) # hidden channels
  133. self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
  134. class SPP(nn.Module):
  135. # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
  136. def __init__(self, c1, c2, k=(5, 9, 13)):
  137. super().__init__()
  138. c_ = c1 // 2 # hidden channels
  139. self.cv1 = Conv(c1, c_, 1, 1)
  140. self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
  141. self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
  142. def forward(self, x):
  143. x = self.cv1(x)
  144. with warnings.catch_warnings():
  145. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  146. return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
  147. class SPPF(nn.Module):
  148. # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
  149. def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
  150. super().__init__()
  151. c_ = c1 // 2 # hidden channels
  152. self.cv1 = Conv(c1, c_, 1, 1)
  153. self.cv2 = Conv(c_ * 4, c2, 1, 1)
  154. self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
  155. def forward(self, x):
  156. x = self.cv1(x)
  157. with warnings.catch_warnings():
  158. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  159. y1 = self.m(x)
  160. y2 = self.m(y1)
  161. return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
  162. class Focus(nn.Module):
  163. # Focus wh information into c-space
  164. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  165. super().__init__()
  166. self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
  167. # self.contract = Contract(gain=2)
  168. def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
  169. return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
  170. # return self.conv(self.contract(x))
  171. class GhostConv(nn.Module):
  172. # Ghost Convolution https://github.com/huawei-noah/ghostnet
  173. def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
  174. super().__init__()
  175. c_ = c2 // 2 # hidden channels
  176. self.cv1 = Conv(c1, c_, k, s, None, g, act)
  177. self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
  178. def forward(self, x):
  179. y = self.cv1(x)
  180. return torch.cat([y, self.cv2(y)], 1)
  181. class GhostBottleneck(nn.Module):
  182. # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
  183. def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
  184. super().__init__()
  185. c_ = c2 // 2
  186. self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
  187. DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
  188. GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
  189. self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
  190. Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
  191. def forward(self, x):
  192. return self.conv(x) + self.shortcut(x)
  193. class Contract(nn.Module):
  194. # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
  195. def __init__(self, gain=2):
  196. super().__init__()
  197. self.gain = gain
  198. def forward(self, x):
  199. b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
  200. s = self.gain
  201. x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
  202. x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
  203. return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
  204. class Expand(nn.Module):
  205. # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
  206. def __init__(self, gain=2):
  207. super().__init__()
  208. self.gain = gain
  209. def forward(self, x):
  210. b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
  211. s = self.gain
  212. x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
  213. x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
  214. return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
  215. class Concat(nn.Module):
  216. # Concatenate a list of tensors along dimension
  217. def __init__(self, dimension=1):
  218. super().__init__()
  219. self.d = dimension
  220. def forward(self, x):
  221. return torch.cat(x, self.d)
  222. class Classify(nn.Module):
  223. # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
  224. def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
  225. super().__init__()
  226. self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
  227. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
  228. self.flat = nn.Flatten()
  229. def forward(self, x):
  230. z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
  231. return self.flat(self.conv(z)) # flatten to x(b,c2)