| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175 |
- #!/usr/bin/env python3
- """
- TensorRT 集成示例
- 展示如何在 demo_lightglue_camera_position_async.py 中集成 TensorRT
- """
- # 在 demo_lightglue_camera_position_async.py 中添加以下代码:
- # ===== 1. 在文件开头添加导入 =====
- """
- try:
- from tensorrt_wrapper import create_tensorrt_models
- TENSORRT_AVAILABLE = True
- except ImportError:
- TENSORRT_AVAILABLE = False
- print("TensorRT not available, using PyTorch models")
- """
- # ===== 2. 在 argparse 中添加参数 =====
- """
- parser.add_argument(
- "--use_tensorrt",
- action="store_true",
- help="Use TensorRT optimized models (requires torch-tensorrt)"
- )
- parser.add_argument(
- "--tensorrt_precision",
- type=str,
- default="fp16",
- choices=["fp32", "fp16", "int8"],
- help="TensorRT precision mode"
- )
- """
- # ===== 3. 在模型加载后添加 TensorRT 优化 =====
- """
- # 原始代码:
- extractor = SuperPoint(...).eval().to(device)
- matcher = LightGlue(...).eval().to(device)
- # 添加 TensorRT 优化:
- if opt.use_tensorrt and TENSORRT_AVAILABLE and device == "cuda":
- try:
- print("Compiling models with TensorRT...")
- print(f"Precision: {opt.tensorrt_precision}")
- print("This may take several minutes on first run...")
-
- extractor, matcher = create_tensorrt_models(
- extractor,
- matcher,
- precision=opt.tensorrt_precision
- )
-
- print("✓ TensorRT models compiled successfully")
- print("Note: Compiled models are cached for faster startup next time")
- except Exception as e:
- print(f"✗ Failed to compile with TensorRT: {e}")
- print("Falling back to PyTorch models")
- import traceback
- traceback.print_exc()
- else:
- if opt.use_tensorrt:
- print("TensorRT requested but not available, using PyTorch models")
- """
- # ===== 4. 完整集成示例 =====
- def integrate_tensorrt_into_demo():
- """
- 完整的集成代码片段
-
- 将以下代码添加到 demo_lightglue_camera_position_async.py 的相应位置
- """
-
- integration_code = '''
- # ===== 在导入部分添加 =====
- try:
- from tensorrt_wrapper import create_tensorrt_models
- TENSORRT_AVAILABLE = True
- except ImportError:
- TENSORRT_AVAILABLE = False
- # ===== 在 argparse 部分添加 =====
- parser.add_argument(
- "--use_tensorrt",
- action="store_true",
- help="Use TensorRT optimized models (requires torch-tensorrt)"
- )
- parser.add_argument(
- "--tensorrt_precision",
- type=str,
- default="fp16",
- choices=["fp32", "fp16", "int8"],
- help="TensorRT precision mode"
- )
- # ===== 在模型加载后(约第338行)添加 =====
- print("Loaded SuperPoint and LightGlue models")
- # TensorRT 优化
- if opt.use_tensorrt and TENSORRT_AVAILABLE and device == "cuda":
- try:
- print("="*60)
- print("Compiling models with TensorRT...")
- print(f"Precision: {opt.tensorrt_precision}")
- print("This may take several minutes on first run...")
- print("="*60)
-
- extractor, matcher = create_tensorrt_models(
- extractor,
- matcher,
- precision=opt.tensorrt_precision
- )
-
- print("="*60)
- print("✓ TensorRT models compiled successfully")
- print("Note: Compiled models are cached for faster startup next time")
- print("="*60)
- except Exception as e:
- print(f"✗ Failed to compile with TensorRT: {e}")
- print("Falling back to PyTorch models")
- import traceback
- traceback.print_exc()
- elif opt.use_tensorrt:
- if not TENSORRT_AVAILABLE:
- print("Warning: TensorRT requested but torch-tensorrt not installed")
- print("Install with: pip install torch-tensorrt")
- elif device != "cuda":
- print("Warning: TensorRT requires CUDA, but running on CPU")
- '''
-
- return integration_code
- # ===== 使用方法 =====
- usage_instructions = """
- 使用方法:
- 1. 安装依赖:
- pip install torch-tensorrt
- 2. 运行程序(首次运行会编译模型,需要几分钟):
- python demo_lightglue_camera_position_async.py \\
- --input "udp://0.0.0.0:12346" \\
- --max_keypoints 128 \\
- --use_fp16 \\
- --use_tensorrt \\
- --tensorrt_precision fp16
- 3. 第二次运行会直接加载编译好的模型(很快)
- 4. 性能对比:
- - PyTorch FP16: ~22 FPS
- - TensorRT FP16: ~35-45 FPS (预期)
- - TensorRT INT8: ~50-60 FPS (预期,但精度可能下降)
- 注意事项:
- - 首次编译需要较长时间(5-15分钟)
- - 编译后的模型会保存在当前目录(superpoint_fp16.ts, lightglue_fp16.ts)
- - 如果模型结构改变,需要删除缓存文件重新编译
- - INT8 量化可能需要校准数据
- """
- if __name__ == "__main__":
- print("="*60)
- print("TensorRT 集成指南")
- print("="*60)
- print(integrate_tensorrt_into_demo())
- print("\n" + "="*60)
- print("使用说明")
- print("="*60)
- print(usage_instructions)
|