demo_lightglue_camera_position_single_window.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827
  1. #!/usr/bin/env python3
  2. # LightGlue demo with camera position tracking in reference image
  3. from pathlib import Path
  4. import argparse
  5. import cv2
  6. import matplotlib.cm as cm
  7. import torch
  8. import numpy as np
  9. import time
  10. from lightglue import LightGlue, SuperPoint
  11. from lightglue.utils import numpy_image_to_torch
  12. # 导入UDP JPEG接收器
  13. try:
  14. from udp_jpeg_receiver import UDPJPEGReceiver
  15. except ImportError:
  16. UDPJPEGReceiver = None
  17. torch.set_grad_enabled(False)
  18. class AverageTimer:
  19. """Class to help manage printing simple timing of code execution."""
  20. def __init__(self, smoothing=0.3, newline=False):
  21. self.smoothing = smoothing
  22. self.newline = newline
  23. self.times = {}
  24. self.will_print = {}
  25. self.reset()
  26. def reset(self):
  27. now = time.time()
  28. self.start = now
  29. self.last_time = now
  30. for name in self.will_print:
  31. self.will_print[name] = False
  32. def update(self, name='default'):
  33. now = time.time()
  34. dt = now - self.last_time
  35. if name in self.times:
  36. dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name]
  37. self.times[name] = dt
  38. self.will_print[name] = True
  39. self.last_time = now
  40. def print(self, text='Timer'):
  41. total = 0.
  42. print('[{}]'.format(text), end=' ')
  43. for key in self.times:
  44. val = self.times[key]
  45. if self.will_print[key]:
  46. print('%s=%.3f' % (key, val), end=' ')
  47. total += val
  48. print('total=%.3f sec {%.1f FPS}' % (total, 1./total), end=' ')
  49. if self.newline:
  50. print(flush=True)
  51. else:
  52. print(end='\r', flush=True)
  53. self.reset()
  54. class VideoStreamer:
  55. """Class to help with reading images from a video stream."""
  56. def __init__(self, source, resize, skip, image_glob, max_length=1000000):
  57. self.source = source
  58. self.skip = skip
  59. self.max_length = max_length
  60. self.resize = resize
  61. self.i = 0
  62. self.cap = None
  63. self.is_ip_camera = False
  64. self.is_udp_jpeg = False
  65. self.udp_receiver = None
  66. self._is_digit_source = isinstance(source, int) or (isinstance(source, str) and source.isdigit())
  67. # 检测UDP透传JPEG模式
  68. if isinstance(source, str) and source.startswith('udp://'):
  69. if UDPJPEGReceiver is None:
  70. raise ImportError("UDPJPEGReceiver not available. Make sure udp_jpeg_receiver.py exists.")
  71. # 解析UDP地址:udp://host:port 或 udp://:port
  72. parts = source.replace('udp://', '').split(':')
  73. if len(parts) == 2:
  74. host = parts[0] if parts[0] else '0.0.0.0'
  75. port = int(parts[1])
  76. else:
  77. host = '0.0.0.0'
  78. port = int(parts[0])
  79. # 验证host是否是本机地址,如果不是则使用0.0.0.0
  80. import socket as sock
  81. try:
  82. # 尝试绑定到指定地址
  83. test_socket = sock.socket(sock.AF_INET, sock.SOCK_DGRAM)
  84. test_socket.bind((host, 0)) # 使用端口0测试
  85. test_socket.close()
  86. # 如果成功,说明是本机地址
  87. except OSError:
  88. # 绑定失败,说明不是本机地址,使用0.0.0.0
  89. print(f"Warning: {host} is not a local address, using 0.0.0.0 instead")
  90. host = '0.0.0.0'
  91. self.is_udp_jpeg = True
  92. self.udp_receiver = UDPJPEGReceiver(host=host, port=port)
  93. self.udp_receiver.start()
  94. print(f'UDP JPEG receiver initialized: {host}:{port}')
  95. elif Path(source).is_dir():
  96. self.listing = []
  97. for ext in image_glob:
  98. self.listing.extend(list(Path(source).glob(ext)))
  99. self.listing = self.listing[:self.max_length]
  100. self.max_length = len(self.listing)
  101. if self.max_length == 0:
  102. raise IOError('No images found in directory: {}'.format(source))
  103. print(f'Found {self.max_length} images in {source}')
  104. elif Path(source).exists():
  105. self.cap = cv2.VideoCapture(source)
  106. else:
  107. # Assume it's a webcam or IP camera
  108. # 对于IP摄像头,尝试使用FFMPEG后端以获得更好的控制
  109. if not self._is_digit_source and not Path(source).exists():
  110. # 这是IP摄像头URL
  111. self.is_ip_camera = True
  112. self.cap = cv2.VideoCapture(source, cv2.CAP_FFMPEG)
  113. else:
  114. self.cap = cv2.VideoCapture(int(source) if self._is_digit_source else source)
  115. # 优化IP摄像头网络流设置 - 减少延迟
  116. if self.is_ip_camera: # 如果是IP摄像头URL
  117. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) # 最小化缓冲区
  118. self.cap.set(cv2.CAP_PROP_FPS, 30) # 尝试设置帧率
  119. self.cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) # 使用MJPEG编码(低延迟)
  120. def next_frame(self):
  121. # UDP JPEG模式
  122. if self.is_udp_jpeg:
  123. frame = self.udp_receiver.get_image(timeout=0.1)
  124. if frame is None:
  125. return None, False
  126. # Convert to grayscale
  127. if len(frame.shape) == 3:
  128. frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
  129. # 智能Resize:只在尺寸不匹配时才resize
  130. if len(self.resize) == 2:
  131. h, w = frame.shape[:2]
  132. # 如果尺寸不匹配才resize
  133. if w != self.resize[0] or h != self.resize[1]:
  134. frame = cv2.resize(frame, tuple(self.resize))
  135. elif len(self.resize) == 1 and self.resize[0] > 0:
  136. h, w = frame.shape[:2]
  137. max_dim = max(h, w)
  138. # 如果最大尺寸不匹配才resize
  139. if max_dim != self.resize[0]:
  140. scale = self.resize[0] / max_dim
  141. new_w, new_h = int(w * scale), int(h * scale)
  142. frame = cv2.resize(frame, (new_w, new_h))
  143. return frame, True
  144. if self.cap is not None:
  145. # 对于IP摄像头,清空缓冲区以获取最新帧
  146. if self.is_ip_camera:
  147. # 这是一个IP摄像头URL,清空缓冲区
  148. for _ in range(3): # 清空最多3帧旧数据(减少overhead)
  149. ret = self.cap.grab()
  150. if not ret:
  151. break
  152. ret, frame = self.cap.read()
  153. if not ret:
  154. return None, False
  155. # Convert to grayscale
  156. if len(frame.shape) == 3:
  157. frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
  158. else:
  159. if self.i >= self.max_length:
  160. return None, False
  161. image_file = self.listing[self.i]
  162. frame = cv2.imread(str(image_file), cv2.IMREAD_GRAYSCALE)
  163. if frame is None:
  164. print(f'Failed to load image: {image_file}')
  165. return None, False
  166. self.i += 1
  167. # Resize
  168. if len(self.resize) == 2:
  169. frame = cv2.resize(frame, tuple(self.resize))
  170. elif len(self.resize) == 1 and self.resize[0] > 0:
  171. h, w = frame.shape[:2]
  172. scale = self.resize[0] / max(h, w)
  173. new_w, new_h = int(w * scale), int(h * scale)
  174. frame = cv2.resize(frame, (new_w, new_h))
  175. # Skip frames
  176. if self.cap is not None:
  177. for _ in range(self.skip):
  178. ret, _ = self.cap.read()
  179. if not ret:
  180. return frame, True
  181. return frame, True
  182. def cleanup(self):
  183. if self.is_udp_jpeg and self.udp_receiver is not None:
  184. self.udp_receiver.stop()
  185. if self.cap is not None:
  186. self.cap.release()
  187. def frame2tensor(frame, device):
  188. """Convert frame to tensor."""
  189. if len(frame.shape) == 2:
  190. frame = frame[None, None] # Add batch and channel dimensions
  191. elif len(frame.shape) == 3:
  192. frame = frame[None] # Add batch dimension
  193. return torch.tensor(frame / 255., dtype=torch.float).to(device)
  194. def make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0, mkpts1,
  195. color, text, path=None, show_keypoints=False,
  196. small_text=None, margin=10):
  197. """Create a visualization of matches between two images."""
  198. H0, W0 = image0.shape[:2]
  199. H1, W1 = image1.shape[:2]
  200. H, W = max(H0, H1), W0 + W1 + margin
  201. out = 255 * np.ones((H, W, 3), np.uint8)
  202. # Place images side by side
  203. out[:H0, :W0] = cv2.cvtColor(image0, cv2.COLOR_GRAY2BGR) if len(image0.shape) == 2 else image0
  204. out[:H1, W0+margin:] = cv2.cvtColor(image1, cv2.COLOR_GRAY2BGR) if len(image1.shape) == 2 else image1
  205. # Draw matches (lines only, no keypoints)
  206. if len(mkpts0) > 0:
  207. mkpts0_int = mkpts0.astype(int)
  208. mkpts1_int = mkpts1.astype(int)
  209. for i, ((x0, y0), (x1, y1)) in enumerate(zip(mkpts0_int, mkpts1_int)):
  210. c = (int(color[i][2] * 255), int(color[i][1] * 255), int(color[i][0] * 255))
  211. cv2.line(out, (x0, y0), (x1 + W0 + margin, y1), c, 1, lineType=cv2.LINE_AA)
  212. # No text information - clean display
  213. if path is not None:
  214. cv2.imwrite(str(path), out)
  215. return out
  216. def draw_camera_position_on_reference(reference_frame, camera_center_current, H, num_matches=0, min_matches=10, inliers_ratio=0.0):
  217. """
  218. 在参考图像上绘制摄像头当前位置的投影
  219. Args:
  220. reference_frame: 参考图像
  221. camera_center_current: 当前帧中摄像头的中心位置 (x, y)
  222. H: 单应性矩阵 (从参考图像到当前帧)
  223. num_matches: 当前匹配的特征点数量
  224. min_matches: 最小匹配数量阈值
  225. inliers_ratio: 内点比例
  226. Returns:
  227. 绘制了摄像头位置的参考图像
  228. """
  229. h_ref, w_ref = reference_frame.shape[:2]
  230. ref_colored = cv2.cvtColor(reference_frame.copy(), cv2.COLOR_GRAY2BGR)
  231. center_ref_int = (int(w_ref // 2), int(h_ref // 2))
  232. # 绘制参考图像中心(绿色十字)
  233. cv2.circle(ref_colored, center_ref_int, 15, (0, 255, 0), 2)
  234. cv2.line(ref_colored, (center_ref_int[0]-20, center_ref_int[1]),
  235. (center_ref_int[0]+20, center_ref_int[1]), (0, 255, 0), 3)
  236. cv2.line(ref_colored, (center_ref_int[0], center_ref_int[1]-20),
  237. (center_ref_int[0], center_ref_int[1]+20), (0, 255, 0), 3)
  238. # 检查匹配数量是否足够
  239. if H is None or num_matches < min_matches:
  240. # 匹配数量不足,不绘制摄像头位置
  241. if num_matches < min_matches:
  242. status_text = f"Insufficient matches: {num_matches}/{min_matches}"
  243. cv2.putText(ref_colored, status_text,
  244. (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
  245. cv2.putText(ref_colored, "Camera position not available",
  246. (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
  247. else:
  248. cv2.putText(ref_colored, "Reference Center",
  249. (center_ref_int[0] + 25, center_ref_int[1] - 10),
  250. cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
  251. return ref_colored
  252. # 匹配数量足够,计算摄像头位置
  253. try:
  254. H_inv = np.linalg.inv(H)
  255. camera_center_ref = cv2.perspectiveTransform(
  256. np.array([[camera_center_current]], dtype=np.float32).reshape(-1, 1, 2),
  257. H_inv
  258. )[0, 0]
  259. # 确保投影点在图像范围内
  260. camera_center_ref = np.clip(camera_center_ref, [0, 0], [w_ref-1, h_ref-1])
  261. # 绘制摄像头当前位置(红色十字)
  262. camera_pos_int = (int(camera_center_ref[0]), int(camera_center_ref[1]))
  263. cv2.circle(ref_colored, camera_pos_int, 12, (0, 0, 255), 2)
  264. cv2.line(ref_colored, (camera_pos_int[0]-15, camera_pos_int[1]),
  265. (camera_pos_int[0]+15, camera_pos_int[1]), (0, 0, 255), 3)
  266. cv2.line(ref_colored, (camera_pos_int[0], camera_pos_int[1]-15),
  267. (camera_pos_int[0], camera_pos_int[1]+15), (0, 0, 255), 3)
  268. # 绘制连接线
  269. cv2.line(ref_colored, center_ref_int, camera_pos_int, (255, 0, 255), 2)
  270. # 添加标签
  271. cv2.putText(ref_colored, "Reference Center",
  272. (center_ref_int[0] + 25, center_ref_int[1] - 10),
  273. cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
  274. cv2.putText(ref_colored, "Camera Position",
  275. (camera_pos_int[0] + 25, camera_pos_int[1] - 10),
  276. cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
  277. # 添加内点比例信息
  278. cv2.putText(ref_colored, f"Matches: {num_matches}",
  279. (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
  280. cv2.putText(ref_colored, f"Inliers: {inliers_ratio:.1%}",
  281. (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
  282. return ref_colored
  283. except np.linalg.LinAlgError:
  284. # 单应性矩阵不可逆
  285. cv2.putText(ref_colored, "Reference Center (Matrix Error)",
  286. (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
  287. cv2.putText(ref_colored, "Camera position not available",
  288. (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
  289. return ref_colored
  290. if __name__ == '__main__':
  291. parser = argparse.ArgumentParser(
  292. description='LightGlue demo with camera position tracking',
  293. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  294. parser.add_argument(
  295. '--input', type=str, default='0',
  296. help='ID of a USB webcam, URL of an IP camera, '
  297. 'UDP stream (udp://host:port), '
  298. 'or path to an image directory or movie file')
  299. parser.add_argument(
  300. '--reference_image', type=str, default=None,
  301. help='Path to reference image to match against (if None, use first frame)')
  302. parser.add_argument(
  303. '--output_dir', type=str, default=None,
  304. help='Directory where to write output frames (If None, no output)')
  305. parser.add_argument(
  306. '--image_glob', type=str, nargs='+', default=['*.png', '*.jpg', '*.jpeg'],
  307. help='Glob if a directory of images is specified')
  308. parser.add_argument(
  309. '--skip', type=int, default=1,
  310. help='Images to skip if input is a movie or directory')
  311. parser.add_argument(
  312. '--max_length', type=int, default=1000000,
  313. help='Maximum length if input is a movie or directory')
  314. parser.add_argument(
  315. '--resize', type=int, nargs='+', default=[640, 480],
  316. help='Resize the input image before running inference. If two numbers, '
  317. 'resize to the exact dimensions, if one number, resize the max '
  318. 'dimension, if -1, do not resize')
  319. parser.add_argument(
  320. '--max_keypoints', type=int, default=2048,
  321. help='Maximum number of keypoints detected by SuperPoint')
  322. parser.add_argument(
  323. '--keypoint_threshold', type=float, default=0.005,
  324. help='SuperPoint keypoint detector confidence threshold')
  325. parser.add_argument(
  326. '--nms_radius', type=int, default=4,
  327. help='SuperPoint Non Maximum Suppression (NMS) radius')
  328. parser.add_argument(
  329. '--match_threshold', type=float, default=0.2,
  330. help='LightGlue match threshold')
  331. parser.add_argument(
  332. '--show_keypoints', action='store_true',
  333. help='Show the detected keypoints')
  334. parser.add_argument(
  335. '--no_display', action='store_true',
  336. help='Do not display images to screen. Useful if running remotely')
  337. parser.add_argument(
  338. '--force_cpu', action='store_true',
  339. help='Force pytorch to run in CPU mode.')
  340. parser.add_argument(
  341. '--min_matches', type=int, default=10,
  342. help='Minimum number of matches to compute homography')
  343. parser.add_argument(
  344. '--flip_horizontal', action='store_true',
  345. help='Flip camera feed horizontally (mirror)')
  346. parser.add_argument(
  347. '--flip_vertical', action='store_true',
  348. help='Flip camera feed vertically')
  349. parser.add_argument(
  350. '--rotate', type=int, default=0, choices=[0, 90, 180, 270],
  351. help='Rotate camera feed (0, 90, 180, 270 degrees clockwise)')
  352. # LightGlue specific parameters
  353. parser.add_argument(
  354. '--depth_confidence', type=float, default=0.95,
  355. help='LightGlue depth confidence for early stopping (-1 to disable)')
  356. parser.add_argument(
  357. '--width_confidence', type=float, default=0.99,
  358. help='LightGlue width confidence for point pruning (-1 to disable)')
  359. parser.add_argument(
  360. '--no_ui', action='store_true',
  361. help='Disable UI interface and run demo directly')
  362. opt = parser.parse_args()
  363. # Hide console output when launched from UI
  364. if opt.no_ui:
  365. import os
  366. import sys
  367. # Redirect stdout and stderr to suppress console output
  368. sys.stdout = open(os.devnull, 'w')
  369. sys.stderr = open(os.devnull, 'w')
  370. if len(opt.resize) == 2 and opt.resize[1] == -1:
  371. opt.resize = opt.resize[0:1]
  372. if len(opt.resize) == 2:
  373. print('Will resize to {}x{} (WxH)'.format(
  374. opt.resize[0], opt.resize[1]))
  375. elif len(opt.resize) == 1 and opt.resize[0] > 0:
  376. print('Will resize max dimension to {}'.format(opt.resize[0]))
  377. elif len(opt.resize) == 1:
  378. print('Will not resize images')
  379. else:
  380. raise ValueError('Cannot specify more than two integers for --resize')
  381. device = 'cuda' if torch.cuda.is_available() and not opt.force_cpu else 'cpu'
  382. print('Running inference on device \"{}\"'.format(device))
  383. # Initialize LightGlue and SuperPoint
  384. extractor = SuperPoint(
  385. max_num_keypoints=opt.max_keypoints,
  386. detection_threshold=opt.keypoint_threshold,
  387. nms_radius=opt.nms_radius
  388. ).eval().to(device)
  389. matcher = LightGlue(
  390. features='superpoint',
  391. depth_confidence=opt.depth_confidence,
  392. width_confidence=opt.width_confidence,
  393. filter_threshold=opt.match_threshold
  394. ).eval().to(device)
  395. print('Loaded SuperPoint and LightGlue models')
  396. # Load reference image if provided
  397. if opt.reference_image is not None:
  398. print(f'==> Loading reference image: {opt.reference_image}')
  399. ref_image = cv2.imread(opt.reference_image, cv2.IMREAD_GRAYSCALE)
  400. if ref_image is None:
  401. raise IOError(f'Cannot load reference image: {opt.reference_image}')
  402. # Resize reference image
  403. h, w = ref_image.shape[:2]
  404. if len(opt.resize) == 2:
  405. ref_image = cv2.resize(ref_image, tuple(opt.resize))
  406. elif len(opt.resize) == 1 and opt.resize[0] > 0:
  407. scale = opt.resize[0] / max(h, w)
  408. new_w, new_h = int(w * scale), int(h * scale)
  409. ref_image = cv2.resize(ref_image, (new_w, new_h))
  410. # Extract features from reference image
  411. ref_tensor = frame2tensor(ref_image, device)
  412. last_data = extractor({'image': ref_tensor})
  413. last_frame = ref_image
  414. last_image_id = 0
  415. print(f'==> Reference image loaded: {ref_image.shape}')
  416. else:
  417. # Use first frame from video stream as reference
  418. vs = VideoStreamer(opt.input, opt.resize, opt.skip,
  419. opt.image_glob, opt.max_length)
  420. frame, ret = vs.next_frame()
  421. assert ret, 'Error when reading the first frame (try different --input?)'
  422. # Apply rotation and flipping to first frame if requested
  423. if opt.rotate == 90:
  424. frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
  425. elif opt.rotate == 180:
  426. frame = cv2.rotate(frame, cv2.ROTATE_180)
  427. elif opt.rotate == 270:
  428. frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
  429. if opt.flip_horizontal:
  430. frame = cv2.flip(frame, 1)
  431. if opt.flip_vertical:
  432. frame = cv2.flip(frame, 0)
  433. frame_tensor = frame2tensor(frame, device)
  434. last_data = extractor({'image': frame_tensor})
  435. last_frame = frame
  436. last_image_id = 0
  437. # Initialize video streamer if not already done
  438. if opt.reference_image is not None:
  439. vs = VideoStreamer(opt.input, opt.resize, opt.skip,
  440. opt.image_glob, opt.max_length)
  441. # 打印IP摄像头连接状态信息
  442. if hasattr(vs, 'cap') and vs.cap is not None:
  443. if isinstance(vs.cap, cv2.VideoCapture) and not opt.input.isdigit():
  444. actual_fps = vs.cap.get(cv2.CAP_PROP_FPS)
  445. actual_buf = vs.cap.get(cv2.CAP_PROP_BUFFERSIZE)
  446. print(f'IP Camera configured - FPS: {actual_fps:.1f}, Buffer: {actual_buf}')
  447. # Store reference image dimensions for bounding box
  448. h0, w0 = last_frame.shape[:2]
  449. if opt.output_dir is not None:
  450. print('==> Will write outputs to {}'.format(opt.output_dir))
  451. Path(opt.output_dir).mkdir(exist_ok=True)
  452. # Create windows to display the demo.
  453. # Only show Camera Position in Reference window
  454. if opt.no_display:
  455. print('Skipping visualization, will not show a GUI.')
  456. else:
  457. cv2.namedWindow('Camera Position in Reference', cv2.WINDOW_NORMAL)
  458. cv2.resizeWindow('Camera Position in Reference', 640, 480)
  459. # Print the keyboard help menu.
  460. print('==> Keyboard control:\n'
  461. '\tn: select the current frame as the anchor\n'
  462. '\te/r: increase/decrease the keypoint confidence threshold\n'
  463. '\td: decrease the match filtering threshold\n'
  464. '\tf: toggle FPS display\n'
  465. '\tk: toggle the visualization of keypoints\n'
  466. '\tq: quit')
  467. timer = AverageTimer()
  468. fps_display = 0.0 # For displaying FPS
  469. last_time = time.time()
  470. original_size = None # To store original frame size before resize
  471. show_fps = False # Toggle for FPS display
  472. while True:
  473. frame, ret = vs.next_frame()
  474. if not ret:
  475. print('Finished demo_lightglue_camera_position.py')
  476. break
  477. # Get original size before any transformation
  478. if original_size is None and hasattr(vs, 'cap') and vs.cap:
  479. # Try to get from video capture properties
  480. if isinstance(vs.cap, cv2.VideoCapture):
  481. orig_w = int(vs.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  482. orig_h = int(vs.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  483. if orig_w > 0 and orig_h > 0:
  484. original_size = (orig_w, orig_h)
  485. # Apply rotation and flipping if requested
  486. if opt.rotate == 90:
  487. frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
  488. if original_size:
  489. original_size = (original_size[1], original_size[0]) # Swap for rotation
  490. elif opt.rotate == 180:
  491. frame = cv2.rotate(frame, cv2.ROTATE_180)
  492. elif opt.rotate == 270:
  493. frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
  494. if original_size:
  495. original_size = (original_size[1], original_size[0]) # Swap for rotation
  496. if opt.flip_horizontal:
  497. frame = cv2.flip(frame, 1) # 1 means horizontal flip
  498. if opt.flip_vertical:
  499. frame = cv2.flip(frame, 0) # 0 means vertical flip
  500. timer.update('data')
  501. stem0, stem1 = last_image_id, vs.i - 1 if hasattr(vs, 'i') else 0
  502. # Extract features from current frame
  503. frame_tensor = frame2tensor(frame, device)
  504. curr_data = extractor({'image': frame_tensor})
  505. # Match features
  506. matches01 = matcher({'image0': last_data, 'image1': curr_data})
  507. # Get keypoints and matches
  508. kpts0 = last_data['keypoints'][0].cpu().numpy()
  509. kpts1 = curr_data['keypoints'][0].cpu().numpy()
  510. matches = matches01['matches0'][0].cpu().numpy()
  511. confidence = matches01['matching_scores0'][0].cpu().numpy()
  512. timer.update('forward')
  513. # Calculate FPS
  514. current_time = time.time()
  515. time_diff = current_time - last_time
  516. if time_diff > 0:
  517. fps_display = 0.9 * fps_display + 0.1 * (1.0 / time_diff) # Smoothed FPS
  518. last_time = current_time
  519. # Extract valid matches
  520. valid = matches > -1
  521. mkpts0 = kpts0[valid]
  522. mkpts1 = kpts1[matches[valid]]
  523. mconf = confidence[valid]
  524. color = cm.jet(mconf)
  525. # Compute homography and draw bounding box
  526. box_color = (0, 255, 0) # Green
  527. num_matches = len(mkpts0)
  528. H = None
  529. # Initialize variables for stability (using global variables)
  530. global last_good_H, last_good_camera_pos, last_camera_pos, smooth_alpha
  531. if 'last_good_H' not in globals():
  532. last_good_H = None
  533. last_good_camera_pos = None
  534. last_camera_pos = None
  535. smooth_alpha = 0.7 # 平滑系数,越大越平滑
  536. if num_matches >= opt.min_matches:
  537. # Compute homography
  538. H, mask = cv2.findHomography(mkpts0, mkpts1, cv2.RANSAC, 5.0)
  539. if H is not None:
  540. # Calculate inliers ratio
  541. inliers_count = np.sum(mask)
  542. inliers_ratio = inliers_count / num_matches
  543. # Quality check for stability
  544. quality_good = inliers_ratio >= 0.25 and num_matches >= 10
  545. # Print inliers ratio to console with quality indicator (ASCII only for Windows consoles)
  546. quality_indicator = "OK" if quality_good else "WARN"
  547. print(f"[Homography] {quality_indicator} Total matches: {num_matches}, Inliers: {inliers_count}, Inliers ratio: {inliers_ratio:.2%}")
  548. if quality_good:
  549. # Quality is good, use current homography
  550. last_good_H = H.copy()
  551. current_H = H
  552. box_color = (0, 255, 0) # Green
  553. else:
  554. # Quality is poor, use last good homography if available
  555. if last_good_H is not None:
  556. current_H = last_good_H
  557. box_color = (255, 165, 0) # Orange (using fallback)
  558. print(f"[Homography] Using fallback matrix (quality too low)")
  559. else:
  560. current_H = H
  561. box_color = (0, 255, 255) # Yellow (first frame)
  562. # Define corners of the reference image
  563. h0, w0 = last_frame.shape[:2]
  564. corners_ref = np.float32([[0, 0], [w0, 0], [w0, h0], [0, h0]]).reshape(-1, 1, 2)
  565. # Transform corners to current frame
  566. corners_curr = cv2.perspectiveTransform(corners_ref, current_H)
  567. # Draw bounding box on current frame
  568. h1, w1 = frame.shape[:2]
  569. frame_with_box = cv2.cvtColor(frame.copy(), cv2.COLOR_GRAY2BGR)
  570. frame_with_box = cv2.polylines(frame_with_box, [np.int32(corners_curr)],
  571. True, box_color, 3, cv2.LINE_AA)
  572. # Add text with inliers ratio and quality
  573. quality_text = "Good" if quality_good else "Fallback"
  574. cv2.putText(frame_with_box, f'{quality_text}! Matches: {num_matches} | Inliers: {inliers_ratio:.1%}',
  575. (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, box_color, 2)
  576. else:
  577. # Homography computation failed
  578. print(f"[Homography] Failed - Total matches: {num_matches}")
  579. frame_with_box = cv2.cvtColor(frame.copy(), cv2.COLOR_GRAY2BGR)
  580. cv2.putText(frame_with_box, f'Tracking... Matches: {num_matches}',
  581. (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
  582. current_H = None
  583. else:
  584. frame_with_box = cv2.cvtColor(frame.copy(), cv2.COLOR_GRAY2BGR)
  585. cv2.putText(frame_with_box, f'Searching... Matches: {num_matches}',
  586. (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
  587. # Add FPS display in bottom right corner
  588. h_box, w_box = frame_with_box.shape[:2]
  589. fps_text = f'FPS: {fps_display:.1f}'
  590. cv2.putText(frame_with_box, fps_text,
  591. (w_box - 120, h_box - 15), cv2.FONT_HERSHEY_SIMPLEX,
  592. 0.6, (255, 255, 255), 2)
  593. # Draw red crosshair in the center of Object Detection window
  594. center_x, center_y = w_box // 2, h_box // 2
  595. crosshair_size = 20
  596. cv2.line(frame_with_box,
  597. (center_x - crosshair_size, center_y),
  598. (center_x + crosshair_size, center_y),
  599. (0, 0, 255), 4, cv2.LINE_AA) # Red horizontal line
  600. cv2.line(frame_with_box,
  601. (center_x, center_y - crosshair_size),
  602. (center_x, center_y + crosshair_size),
  603. (0, 0, 255), 4, cv2.LINE_AA) # Red vertical line
  604. # 计算当前帧中摄像头的中心位置
  605. h_curr, w_curr = frame.shape[:2]
  606. camera_center_current = (w_curr // 2, h_curr // 2)
  607. # 在参考图像上绘制摄像头位置
  608. inliers_ratio = 0.0
  609. if 'current_H' in locals() and current_H is not None and num_matches >= opt.min_matches:
  610. inliers_count = np.sum(mask) if 'mask' in locals() else 0
  611. inliers_ratio = inliers_count / num_matches if num_matches > 0 else 0.0
  612. # 计算摄像头在参考图像中的位置
  613. try:
  614. H_inv = np.linalg.inv(current_H)
  615. camera_center_ref = cv2.perspectiveTransform(
  616. np.array([[camera_center_current]], dtype=np.float32).reshape(-1, 1, 2),
  617. H_inv
  618. )[0, 0]
  619. # 应用平滑滤波
  620. if last_camera_pos is not None:
  621. # 指数平滑
  622. camera_center_ref = smooth_alpha * last_camera_pos + (1 - smooth_alpha) * camera_center_ref
  623. last_camera_pos = camera_center_ref.copy()
  624. # 更新camera_center_current为平滑后的位置
  625. camera_center_current_smooth = cv2.perspectiveTransform(
  626. np.array([[camera_center_ref]], dtype=np.float32).reshape(-1, 1, 2),
  627. current_H
  628. )[0, 0]
  629. except np.linalg.LinAlgError:
  630. # 矩阵不可逆,使用原始位置
  631. pass
  632. reference_with_camera_pos = draw_camera_position_on_reference(
  633. last_frame, camera_center_current, current_H if 'current_H' in locals() else H,
  634. num_matches, opt.min_matches, inliers_ratio
  635. )
  636. # Add FPS display to Camera Position window if enabled
  637. if show_fps:
  638. fps_text = f'FPS: {fps_display:.1f}'
  639. cv2.putText(reference_with_camera_pos, fps_text,
  640. (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
  641. text = [
  642. 'LightGlue with Camera Position Tracking',
  643. 'Keypoints: {}:{}'.format(len(kpts0), len(kpts1)),
  644. 'Matches: {}'.format(len(mkpts0))
  645. ]
  646. small_text = [
  647. 'Keypoint Threshold: {:.4f}'.format(opt.keypoint_threshold),
  648. 'Match Threshold: {:.2f}'.format(opt.match_threshold),
  649. 'Image Pair: {:06}:{:06}'.format(stem0, stem1),
  650. 'Stopped at layer: {}/{}'.format(matches01['stop'], 9)
  651. ]
  652. # Create visualization with matches
  653. out = make_matching_plot_fast(
  654. last_frame, frame, kpts0, kpts1, mkpts0, mkpts1, color, text,
  655. path=None, show_keypoints=opt.show_keypoints, small_text=small_text)
  656. # No additional text or elements added to out image
  657. # Show only Camera Position in Reference window
  658. if not opt.no_display:
  659. cv2.imshow('Camera Position in Reference', reference_with_camera_pos)
  660. key = chr(cv2.waitKey(1) & 0xFF)
  661. else:
  662. key = ''
  663. # Handle keyboard input for both modes
  664. if key == 'q':
  665. vs.cleanup()
  666. print('Exiting (via q) demo_lightglue_camera_position.py')
  667. break
  668. elif key == 'n': # set the current frame as anchor
  669. last_data = curr_data
  670. last_frame = frame
  671. last_image_id = stem1
  672. elif key in ['e', 'r']:
  673. # Increase/decrease keypoint threshold by 10% each keypress.
  674. d = 0.1 * (-1 if key == 'e' else 1)
  675. opt.keypoint_threshold = min(max(
  676. 0.0001, opt.keypoint_threshold * (1 + d)), 1)
  677. extractor.conf.detection_threshold = opt.keypoint_threshold
  678. print('\nChanged the keypoint threshold to {:.4f}'.format(
  679. opt.keypoint_threshold))
  680. elif key == 'd':
  681. # Decrease match threshold by 0.05
  682. opt.match_threshold = min(max(
  683. 0.05, opt.match_threshold - 0.05), .95)
  684. matcher.conf.filter_threshold = opt.match_threshold
  685. print('\nChanged the match threshold to {:.2f}'.format(
  686. opt.match_threshold))
  687. elif key == 'f':
  688. # Toggle FPS display
  689. show_fps = not show_fps
  690. elif key == 'k':
  691. opt.show_keypoints = not opt.show_keypoints
  692. timer.update('viz')
  693. timer.print('LightGlue')
  694. if opt.output_dir is not None:
  695. stem = 'matches_{:06}_{:06}'.format(stem0, stem1)
  696. out_file = str(Path(opt.output_dir, stem + '.png'))
  697. print('\nWriting image to {}'.format(out_file))
  698. cv2.imwrite(out_file, out)
  699. # Also save detection result
  700. det_file = str(Path(opt.output_dir, 'detection_' + stem + '.png'))
  701. cv2.imwrite(det_file, frame_with_box)
  702. # Save camera position result
  703. cam_file = str(Path(opt.output_dir, 'camera_pos_' + stem + '.png'))
  704. cv2.imwrite(cam_file, reference_with_camera_pos)
  705. cv2.destroyAllWindows()
  706. vs.cleanup()