#!/usr/bin/env python3 """ 测试 INT8 编译是否正常工作 """ import torch import sys print("Testing INT8 compilation...") print("="*60) # 检查CUDA if not torch.cuda.is_available(): print("[ERROR] CUDA not available") sys.exit(1) print(f"CUDA available: {torch.cuda.is_available()}") print(f"GPU: {torch.cuda.get_device_name(0)}") # 检查TensorRT try: import torch_tensorrt print(f"torch-tensorrt version: {torch_tensorrt.__version__}") except ImportError: print("[ERROR] torch-tensorrt not installed") sys.exit(1) # 创建简单模型测试INT8编译 print("\nCreating test model...") class SimpleModel(torch.nn.Module): def __init__(self): super().__init__() self.conv1 = torch.nn.Conv2d(1, 32, 3, padding=1) self.conv2 = torch.nn.Conv2d(32, 64, 3, padding=1) self.pool = torch.nn.MaxPool2d(2) def forward(self, x): x = torch.relu(self.conv1(x)) x = self.pool(x) x = torch.relu(self.conv2(x)) return x model = SimpleModel().eval().cuda() example_input = torch.randn(1, 1, 480, 640).cuda() print("Model created successfully") print(f"Input shape: {example_input.shape}") # 测试INT8编译 print("\n" + "="*60) print("Attempting INT8 compilation...") print("WARNING: This may take 5-10 minutes") print("="*60) sys.stdout.flush() try: print("Starting compilation...") sys.stdout.flush() trt_model = torch_tensorrt.compile( model, inputs=[example_input], enabled_precisions={torch.float, torch.int8}, workspace_size=1 << 30, # 1GB min_block_size=7, ir="torchscript", ) print("\n[OK] INT8 compilation successful!") print("="*60) # 测试推理 print("Testing inference...") with torch.no_grad(): output = trt_model(example_input) print(f"Output shape: {output.shape}") print("[OK] Inference successful!") except Exception as e: print(f"\n[ERROR] INT8 compilation failed: {e}") print("="*60) print("Full traceback:") import traceback traceback.print_exc() print("\nRecommendation: Use FP16 instead of INT8") sys.exit(1) print("\n" + "="*60) print("INT8 compilation test PASSED!") print("You can use --tensorrt_precision int8 in your demo script") print("="*60)