#!/usr/bin/env python3
"""
TensorRT 集成示例

展示如何在 demo_lightglue_camera_position_async.py 中集成 TensorRT
"""

# 在 demo_lightglue_camera_position_async.py 中添加以下代码：

# ===== 1. 在文件开头添加导入 =====
"""
try:
    from tensorrt_wrapper import create_tensorrt_models
    TENSORRT_AVAILABLE = True
except ImportError:
    TENSORRT_AVAILABLE = False
    print("TensorRT not available, using PyTorch models")
"""

# ===== 2. 在 argparse 中添加参数 =====
"""
parser.add_argument(
    "--use_tensorrt",
    action="store_true",
    help="Use TensorRT optimized models (requires torch-tensorrt)"
)
parser.add_argument(
    "--tensorrt_precision",
    type=str,
    default="fp16",
    choices=["fp32", "fp16", "int8"],
    help="TensorRT precision mode"
)
"""

# ===== 3. 在模型加载后添加 TensorRT 优化 =====
"""
# 原始代码：
extractor = SuperPoint(...).eval().to(device)
matcher = LightGlue(...).eval().to(device)

# 添加 TensorRT 优化：
if opt.use_tensorrt and TENSORRT_AVAILABLE and device == "cuda":
    try:
        print("Compiling models with TensorRT...")
        print(f"Precision: {opt.tensorrt_precision}")
        print("This may take several minutes on first run...")
        
        extractor, matcher = create_tensorrt_models(
            extractor,
            matcher,
            precision=opt.tensorrt_precision
        )
        
        print("✓ TensorRT models compiled successfully")
        print("Note: Compiled models are cached for faster startup next time")
    except Exception as e:
        print(f"✗ Failed to compile with TensorRT: {e}")
        print("Falling back to PyTorch models")
        import traceback
        traceback.print_exc()
else:
    if opt.use_tensorrt:
        print("TensorRT requested but not available, using PyTorch models")
"""

# ===== 4. 完整集成示例 =====

def integrate_tensorrt_into_demo():
    """
    完整的集成代码片段
    
    将以下代码添加到 demo_lightglue_camera_position_async.py 的相应位置
    """
    
    integration_code = '''
# ===== 在导入部分添加 =====
try:
    from tensorrt_wrapper import create_tensorrt_models
    TENSORRT_AVAILABLE = True
except ImportError:
    TENSORRT_AVAILABLE = False

# ===== 在 argparse 部分添加 =====
parser.add_argument(
    "--use_tensorrt",
    action="store_true",
    help="Use TensorRT optimized models (requires torch-tensorrt)"
)
parser.add_argument(
    "--tensorrt_precision",
    type=str,
    default="fp16",
    choices=["fp32", "fp16", "int8"],
    help="TensorRT precision mode"
)

# ===== 在模型加载后（约第338行）添加 =====
print("Loaded SuperPoint and LightGlue models")

# TensorRT 优化
if opt.use_tensorrt and TENSORRT_AVAILABLE and device == "cuda":
    try:
        print("="*60)
        print("Compiling models with TensorRT...")
        print(f"Precision: {opt.tensorrt_precision}")
        print("This may take several minutes on first run...")
        print("="*60)
        
        extractor, matcher = create_tensorrt_models(
            extractor,
            matcher,
            precision=opt.tensorrt_precision
        )
        
        print("="*60)
        print("✓ TensorRT models compiled successfully")
        print("Note: Compiled models are cached for faster startup next time")
        print("="*60)
    except Exception as e:
        print(f"✗ Failed to compile with TensorRT: {e}")
        print("Falling back to PyTorch models")
        import traceback
        traceback.print_exc()
elif opt.use_tensorrt:
    if not TENSORRT_AVAILABLE:
        print("Warning: TensorRT requested but torch-tensorrt not installed")
        print("Install with: pip install torch-tensorrt")
    elif device != "cuda":
        print("Warning: TensorRT requires CUDA, but running on CPU")
'''
    
    return integration_code


# ===== 使用方法 =====

usage_instructions = """
使用方法：

1. 安装依赖：
   pip install torch-tensorrt

2. 运行程序（首次运行会编译模型，需要几分钟）：
   python demo_lightglue_camera_position_async.py \\
       --input "udp://0.0.0.0:12346" \\
       --max_keypoints 128 \\
       --use_fp16 \\
       --use_tensorrt \\
       --tensorrt_precision fp16

3. 第二次运行会直接加载编译好的模型（很快）

4. 性能对比：
   - PyTorch FP16: ~22 FPS
   - TensorRT FP16: ~35-45 FPS (预期)
   - TensorRT INT8: ~50-60 FPS (预期，但精度可能下降)

注意事项：
- 首次编译需要较长时间（5-15分钟）
- 编译后的模型会保存在当前目录（superpoint_fp16.ts, lightglue_fp16.ts）
- 如果模型结构改变，需要删除缓存文件重新编译
- INT8 量化可能需要校准数据
"""

if __name__ == "__main__":
    print("="*60)
    print("TensorRT 集成指南")
    print("="*60)
    print(integrate_tensorrt_into_demo())
    print("\n" + "="*60)
    print("使用说明")
    print("="*60)
    print(usage_instructions)