| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- import sys
- import os
- from pathlib import Path
- import cv2
- import numpy as np
- import json
- import time
- # 确保输出编码为UTF-8
- sys.stdout.reconfigure(encoding='utf-8')
- sys.stderr.reconfigure(encoding='utf-8')
- # 添加OnnxOCR路径
- project_root = Path(__file__).parent.parent.parent
- onnxocr_path = project_root / 'python' / 'OnnxOCR-main'
- if onnxocr_path.exists():
- sys.path.insert(0, str(onnxocr_path))
- print(f"[INFO] 使用本地OnnxOCR路径: {onnxocr_path}")
- else:
- print(f"[ERROR] 未找到本地OnnxOCR路径: {onnxocr_path}")
- sys.exit(1)
- try:
- from onnxocr.onnx_paddleocr import ONNXPaddleOcr
- print("[INFO] OnnxOCR 导入成功")
- except ImportError as e:
- print(f"[ERROR] 无法导入OnnxOCR模块: {e}")
- sys.exit(1)
- def test_different_ocr_modes(image_path):
- """测试OnnxOCR的不同模式"""
-
- print(f"\n🧪 测试图片: {image_path}")
-
- # 读取图片
- img_array = np.fromfile(str(image_path), dtype=np.uint8)
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
- if img is None:
- print(f"[ERROR] 无法读取图片: {image_path}")
- return
-
- print(f"[INFO] 图片尺寸: {img.shape[:2][::-1]} (宽x高)")
-
- # 初始化OnnxOCR
- print("\n[INFO] 初始化OnnxOCR...")
- ocr_model = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False)
-
- print("\n" + "="*60)
-
- # 模式1: 完整OCR(检测+识别+角度分类)
- print("🔍 模式1: 完整OCR(检测+识别+角度分类)")
- start_time = time.time()
-
- full_result = ocr_model.ocr(img, det=True, rec=True, cls=True)
-
- elapsed = time.time() - start_time
- print(f"⏱️ 耗时: {elapsed:.2f}秒")
-
- if full_result and full_result[0]:
- print(f"📊 检测到 {len(full_result[0])} 个文字区域")
- for i, item in enumerate(full_result[0][:3]): # 只显示前3个
- bbox, (text, confidence) = item
- print(f" {i+1}. '{text}' (置信度: {confidence:.3f})")
- if len(full_result[0]) > 3:
- print(f" ... 还有 {len(full_result[0]) - 3} 个区域")
-
- print("\n" + "="*60)
-
- # 模式2: 只检测文字区域(不识别文字内容)
- print("🎯 模式2: 只检测文字区域(不识别文字内容)")
- start_time = time.time()
-
- detection_only = ocr_model.ocr(img, det=True, rec=False, cls=False)
-
- elapsed = time.time() - start_time
- print(f"⏱️ 耗时: {elapsed:.2f}秒")
-
- if detection_only and detection_only[0]:
- print(f"📍 检测到 {len(detection_only[0])} 个文字区域(仅坐标)")
- for i, bbox in enumerate(detection_only[0][:3]): # 只显示前3个
- # 计算区域中心和大小
- bbox_array = np.array(bbox)
- center_x = np.mean(bbox_array[:, 0])
- center_y = np.mean(bbox_array[:, 1])
- width = np.max(bbox_array[:, 0]) - np.min(bbox_array[:, 0])
- height = np.max(bbox_array[:, 1]) - np.min(bbox_array[:, 1])
- print(f" {i+1}. 中心({center_x:.0f},{center_y:.0f}) 尺寸({width:.0f}x{height:.0f})")
- if len(detection_only[0]) > 3:
- print(f" ... 还有 {len(detection_only[0]) - 3} 个区域")
-
- print("\n" + "="*60)
-
- # 模式3: 直接调用文字检测器
- print("🔧 模式3: 直接调用文字检测器")
- start_time = time.time()
-
- detector_result = ocr_model.text_detector(img)
-
- elapsed = time.time() - start_time
- print(f"⏱️ 耗时: {elapsed:.2f}秒")
-
- if detector_result is not None and len(detector_result) > 0:
- print(f"🎪 检测到 {len(detector_result)} 个文字区域(原始检测器输出)")
- for i, bbox in enumerate(detector_result[:3]): # 只显示前3个
- bbox_array = np.array(bbox)
- center_x = np.mean(bbox_array[:, 0])
- center_y = np.mean(bbox_array[:, 1])
- width = np.max(bbox_array[:, 0]) - np.min(bbox_array[:, 0])
- height = np.max(bbox_array[:, 1]) - np.min(bbox_array[:, 1])
- print(f" {i+1}. 中心({center_x:.0f},{center_y:.0f}) 尺寸({width:.0f}x{height:.0f})")
- if len(detector_result) > 3:
- print(f" ... 还有 {len(detector_result) - 3} 个区域")
-
- print("\n" + "="*60)
-
- # 性能对比总结
- print("📈 性能对比总结:")
- print(" 模式1 (完整OCR): 最慢,但提供完整的文字内容和坐标")
- print(" 模式2 (仅检测): 较快,只提供文字区域坐标")
- print(" 模式3 (检测器): 最快,提供原始检测结果")
- print("\n💡 推荐使用场景:")
- print(" - 需要文字内容: 使用模式1")
- print(" - 只需要区域位置: 使用模式2或3")
- print(" - 批量处理/实时应用: 使用模式2或3,然后选择性识别")
- if __name__ == '__main__':
- # 如果没有参数,使用默认的测试图片
- if len(sys.argv) < 2:
- # 尝试找到一个可用的测试图片
- project_root = Path(__file__).parent.parent.parent
- test_paths = [
- project_root / "static/漫画/image/鬼-巷第001卷/第一章/test/tmp/0004_鬼-巷第001卷_text_mask.png",
- project_root / "static/漫画/image/鬼-巷第001卷/第一章/0004_鬼-巷第001卷.jpeg",
- ]
-
- image_path = None
- for test_path in test_paths:
- if test_path.exists():
- image_path = str(test_path)
- print(f"[INFO] 使用默认测试图片: {test_path.name}")
- break
-
- if image_path is None:
- print("Usage: python test_onnxocr_detection_modes.py <image_path>")
- print("No test image found. Please provide an image path.")
- sys.exit(1)
- else:
- image_path = sys.argv[1]
- # 处理Windows编码问题
- try:
- # 尝试使用Path对象处理路径
- path_obj = Path(image_path)
- if not path_obj.exists():
- print(f"[ERROR] 图片文件不存在: {image_path}")
- # 尝试相对路径
- project_root = Path(__file__).parent.parent.parent
- path_obj = project_root / image_path
- if path_obj.exists():
- image_path = str(path_obj)
- print(f"[INFO] 使用相对路径: {path_obj}")
- else:
- sys.exit(1)
- else:
- image_path = str(path_obj)
- except Exception as e:
- print(f"[ERROR] 路径处理错误: {e}")
- sys.exit(1)
-
- test_different_ocr_modes(image_path)
|