#!/usr/bin/env python3 """ TensorRT 集成示例 展示如何在 demo_lightglue_camera_position_async.py 中集成 TensorRT """ # 在 demo_lightglue_camera_position_async.py 中添加以下代码: # ===== 1. 在文件开头添加导入 ===== """ try: from tensorrt_wrapper import create_tensorrt_models TENSORRT_AVAILABLE = True except ImportError: TENSORRT_AVAILABLE = False print("TensorRT not available, using PyTorch models") """ # ===== 2. 在 argparse 中添加参数 ===== """ parser.add_argument( "--use_tensorrt", action="store_true", help="Use TensorRT optimized models (requires torch-tensorrt)" ) parser.add_argument( "--tensorrt_precision", type=str, default="fp16", choices=["fp32", "fp16", "int8"], help="TensorRT precision mode" ) """ # ===== 3. 在模型加载后添加 TensorRT 优化 ===== """ # 原始代码: extractor = SuperPoint(...).eval().to(device) matcher = LightGlue(...).eval().to(device) # 添加 TensorRT 优化: if opt.use_tensorrt and TENSORRT_AVAILABLE and device == "cuda": try: print("Compiling models with TensorRT...") print(f"Precision: {opt.tensorrt_precision}") print("This may take several minutes on first run...") extractor, matcher = create_tensorrt_models( extractor, matcher, precision=opt.tensorrt_precision ) print("✓ TensorRT models compiled successfully") print("Note: Compiled models are cached for faster startup next time") except Exception as e: print(f"✗ Failed to compile with TensorRT: {e}") print("Falling back to PyTorch models") import traceback traceback.print_exc() else: if opt.use_tensorrt: print("TensorRT requested but not available, using PyTorch models") """ # ===== 4. 完整集成示例 ===== def integrate_tensorrt_into_demo(): """ 完整的集成代码片段 将以下代码添加到 demo_lightglue_camera_position_async.py 的相应位置 """ integration_code = ''' # ===== 在导入部分添加 ===== try: from tensorrt_wrapper import create_tensorrt_models TENSORRT_AVAILABLE = True except ImportError: TENSORRT_AVAILABLE = False # ===== 在 argparse 部分添加 ===== parser.add_argument( "--use_tensorrt", action="store_true", help="Use TensorRT optimized models (requires torch-tensorrt)" ) parser.add_argument( "--tensorrt_precision", type=str, default="fp16", choices=["fp32", "fp16", "int8"], help="TensorRT precision mode" ) # ===== 在模型加载后(约第338行)添加 ===== print("Loaded SuperPoint and LightGlue models") # TensorRT 优化 if opt.use_tensorrt and TENSORRT_AVAILABLE and device == "cuda": try: print("="*60) print("Compiling models with TensorRT...") print(f"Precision: {opt.tensorrt_precision}") print("This may take several minutes on first run...") print("="*60) extractor, matcher = create_tensorrt_models( extractor, matcher, precision=opt.tensorrt_precision ) print("="*60) print("✓ TensorRT models compiled successfully") print("Note: Compiled models are cached for faster startup next time") print("="*60) except Exception as e: print(f"✗ Failed to compile with TensorRT: {e}") print("Falling back to PyTorch models") import traceback traceback.print_exc() elif opt.use_tensorrt: if not TENSORRT_AVAILABLE: print("Warning: TensorRT requested but torch-tensorrt not installed") print("Install with: pip install torch-tensorrt") elif device != "cuda": print("Warning: TensorRT requires CUDA, but running on CPU") ''' return integration_code # ===== 使用方法 ===== usage_instructions = """ 使用方法: 1. 安装依赖: pip install torch-tensorrt 2. 运行程序(首次运行会编译模型,需要几分钟): python demo_lightglue_camera_position_async.py \\ --input "udp://0.0.0.0:12346" \\ --max_keypoints 128 \\ --use_fp16 \\ --use_tensorrt \\ --tensorrt_precision fp16 3. 第二次运行会直接加载编译好的模型(很快) 4. 性能对比: - PyTorch FP16: ~22 FPS - TensorRT FP16: ~35-45 FPS (预期) - TensorRT INT8: ~50-60 FPS (预期,但精度可能下降) 注意事项: - 首次编译需要较长时间(5-15分钟) - 编译后的模型会保存在当前目录(superpoint_fp16.ts, lightglue_fp16.ts) - 如果模型结构改变,需要删除缓存文件重新编译 - INT8 量化可能需要校准数据 """ if __name__ == "__main__": print("="*60) print("TensorRT 集成指南") print("="*60) print(integrate_tensorrt_into_demo()) print("\n" + "="*60) print("使用说明") print("="*60) print(usage_instructions)