import sys import os from pathlib import Path import cv2 import numpy as np import json import time # 确保输出编码为UTF-8 sys.stdout.reconfigure(encoding='utf-8') sys.stderr.reconfigure(encoding='utf-8') # 添加OnnxOCR路径 project_root = Path(__file__).parent.parent.parent onnxocr_path = project_root / 'python' / 'OnnxOCR-main' if onnxocr_path.exists(): sys.path.insert(0, str(onnxocr_path)) print(f"[INFO] 使用本地OnnxOCR路径: {onnxocr_path}") else: print(f"[ERROR] 未找到本地OnnxOCR路径: {onnxocr_path}") sys.exit(1) try: from onnxocr.onnx_paddleocr import ONNXPaddleOcr print("[INFO] OnnxOCR 导入成功") except ImportError as e: print(f"[ERROR] 无法导入OnnxOCR模块: {e}") sys.exit(1) def test_different_ocr_modes(image_path): """测试OnnxOCR的不同模式""" print(f"\n🧪 测试图片: {image_path}") # 读取图片 img_array = np.fromfile(str(image_path), dtype=np.uint8) img = cv2.imdecode(img_array, cv2.IMREAD_COLOR) if img is None: print(f"[ERROR] 无法读取图片: {image_path}") return print(f"[INFO] 图片尺寸: {img.shape[:2][::-1]} (宽x高)") # 初始化OnnxOCR print("\n[INFO] 初始化OnnxOCR...") ocr_model = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False) print("\n" + "="*60) # 模式1: 完整OCR(检测+识别+角度分类) print("🔍 模式1: 完整OCR(检测+识别+角度分类)") start_time = time.time() full_result = ocr_model.ocr(img, det=True, rec=True, cls=True) elapsed = time.time() - start_time print(f"⏱️ 耗时: {elapsed:.2f}秒") if full_result and full_result[0]: print(f"📊 检测到 {len(full_result[0])} 个文字区域") for i, item in enumerate(full_result[0][:3]): # 只显示前3个 bbox, (text, confidence) = item print(f" {i+1}. '{text}' (置信度: {confidence:.3f})") if len(full_result[0]) > 3: print(f" ... 还有 {len(full_result[0]) - 3} 个区域") print("\n" + "="*60) # 模式2: 只检测文字区域(不识别文字内容) print("🎯 模式2: 只检测文字区域(不识别文字内容)") start_time = time.time() detection_only = ocr_model.ocr(img, det=True, rec=False, cls=False) elapsed = time.time() - start_time print(f"⏱️ 耗时: {elapsed:.2f}秒") if detection_only and detection_only[0]: print(f"📍 检测到 {len(detection_only[0])} 个文字区域(仅坐标)") for i, bbox in enumerate(detection_only[0][:3]): # 只显示前3个 # 计算区域中心和大小 bbox_array = np.array(bbox) center_x = np.mean(bbox_array[:, 0]) center_y = np.mean(bbox_array[:, 1]) width = np.max(bbox_array[:, 0]) - np.min(bbox_array[:, 0]) height = np.max(bbox_array[:, 1]) - np.min(bbox_array[:, 1]) print(f" {i+1}. 中心({center_x:.0f},{center_y:.0f}) 尺寸({width:.0f}x{height:.0f})") if len(detection_only[0]) > 3: print(f" ... 还有 {len(detection_only[0]) - 3} 个区域") print("\n" + "="*60) # 模式3: 直接调用文字检测器 print("🔧 模式3: 直接调用文字检测器") start_time = time.time() detector_result = ocr_model.text_detector(img) elapsed = time.time() - start_time print(f"⏱️ 耗时: {elapsed:.2f}秒") if detector_result is not None and len(detector_result) > 0: print(f"🎪 检测到 {len(detector_result)} 个文字区域(原始检测器输出)") for i, bbox in enumerate(detector_result[:3]): # 只显示前3个 bbox_array = np.array(bbox) center_x = np.mean(bbox_array[:, 0]) center_y = np.mean(bbox_array[:, 1]) width = np.max(bbox_array[:, 0]) - np.min(bbox_array[:, 0]) height = np.max(bbox_array[:, 1]) - np.min(bbox_array[:, 1]) print(f" {i+1}. 中心({center_x:.0f},{center_y:.0f}) 尺寸({width:.0f}x{height:.0f})") if len(detector_result) > 3: print(f" ... 还有 {len(detector_result) - 3} 个区域") print("\n" + "="*60) # 性能对比总结 print("📈 性能对比总结:") print(" 模式1 (完整OCR): 最慢,但提供完整的文字内容和坐标") print(" 模式2 (仅检测): 较快,只提供文字区域坐标") print(" 模式3 (检测器): 最快,提供原始检测结果") print("\n💡 推荐使用场景:") print(" - 需要文字内容: 使用模式1") print(" - 只需要区域位置: 使用模式2或3") print(" - 批量处理/实时应用: 使用模式2或3,然后选择性识别") if __name__ == '__main__': # 如果没有参数,使用默认的测试图片 if len(sys.argv) < 2: # 尝试找到一个可用的测试图片 project_root = Path(__file__).parent.parent.parent test_paths = [ project_root / "static/漫画/image/鬼-巷第001卷/第一章/test/tmp/0004_鬼-巷第001卷_text_mask.png", project_root / "static/漫画/image/鬼-巷第001卷/第一章/0004_鬼-巷第001卷.jpeg", ] image_path = None for test_path in test_paths: if test_path.exists(): image_path = str(test_path) print(f"[INFO] 使用默认测试图片: {test_path.name}") break if image_path is None: print("Usage: python test_onnxocr_detection_modes.py ") print("No test image found. Please provide an image path.") sys.exit(1) else: image_path = sys.argv[1] # 处理Windows编码问题 try: # 尝试使用Path对象处理路径 path_obj = Path(image_path) if not path_obj.exists(): print(f"[ERROR] 图片文件不存在: {image_path}") # 尝试相对路径 project_root = Path(__file__).parent.parent.parent path_obj = project_root / image_path if path_obj.exists(): image_path = str(path_obj) print(f"[INFO] 使用相对路径: {path_obj}") else: sys.exit(1) else: image_path = str(path_obj) except Exception as e: print(f"[ERROR] 路径处理错误: {e}") sys.exit(1) test_different_ocr_modes(image_path)