yichael
/
AIStoryBoard


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
							import sys
import os
from pathlib import Path
import cv2
import numpy as np
import json
import time

# 确保输出编码为UTF-8
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')

# 添加OnnxOCR路径
project_root = Path(__file__).parent.parent.parent
onnxocr_path = project_root / 'python' / 'OnnxOCR-main'
if onnxocr_path.exists():
    sys.path.insert(0, str(onnxocr_path))
    print(f"[INFO] 使用本地OnnxOCR路径: {onnxocr_path}")
else:
    print(f"[ERROR] 未找到本地OnnxOCR路径: {onnxocr_path}")
    sys.exit(1)

try:
    from onnxocr.onnx_paddleocr import ONNXPaddleOcr
    print("[INFO] OnnxOCR 导入成功")
except ImportError as e:
    print(f"[ERROR] 无法导入OnnxOCR模块: {e}")
    sys.exit(1)

def test_different_ocr_modes(image_path):
    """测试OnnxOCR的不同模式"""
    
    print(f"\n🧪 测试图片: {image_path}")
    
    # 读取图片
    img_array = np.fromfile(str(image_path), dtype=np.uint8)
    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
    
    if img is None:
        print(f"[ERROR] 无法读取图片: {image_path}")
        return
    
    print(f"[INFO] 图片尺寸: {img.shape[:2][::-1]} (宽x高)")
    
    # 初始化OnnxOCR
    print("\n[INFO] 初始化OnnxOCR...")
    ocr_model = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False)
    
    print("\n" + "="*60)
    
    # 模式1: 完整OCR（检测+识别+角度分类）
    print("🔍 模式1: 完整OCR（检测+识别+角度分类）")
    start_time = time.time()
    
    full_result = ocr_model.ocr(img, det=True, rec=True, cls=True)
    
    elapsed = time.time() - start_time
    print(f"⏱️ 耗时: {elapsed:.2f}秒")
    
    if full_result and full_result[0]:
        print(f"📊 检测到 {len(full_result[0])} 个文字区域")
        for i, item in enumerate(full_result[0][:3]):  # 只显示前3个
            bbox, (text, confidence) = item
            print(f"  {i+1}. '{text}' (置信度: {confidence:.3f})")
        if len(full_result[0]) > 3:
            print(f"  ... 还有 {len(full_result[0]) - 3} 个区域")
    
    print("\n" + "="*60)
    
    # 模式2: 只检测文字区域（不识别文字内容）
    print("🎯 模式2: 只检测文字区域（不识别文字内容）")
    start_time = time.time()
    
    detection_only = ocr_model.ocr(img, det=True, rec=False, cls=False)
    
    elapsed = time.time() - start_time
    print(f"⏱️ 耗时: {elapsed:.2f}秒")
    
    if detection_only and detection_only[0]:
        print(f"📍 检测到 {len(detection_only[0])} 个文字区域（仅坐标）")
        for i, bbox in enumerate(detection_only[0][:3]):  # 只显示前3个
            # 计算区域中心和大小
            bbox_array = np.array(bbox)
            center_x = np.mean(bbox_array[:, 0])
            center_y = np.mean(bbox_array[:, 1])
            width = np.max(bbox_array[:, 0]) - np.min(bbox_array[:, 0])
            height = np.max(bbox_array[:, 1]) - np.min(bbox_array[:, 1])
            print(f"  {i+1}. 中心({center_x:.0f},{center_y:.0f}) 尺寸({width:.0f}x{height:.0f})")
        if len(detection_only[0]) > 3:
            print(f"  ... 还有 {len(detection_only[0]) - 3} 个区域")
    
    print("\n" + "="*60)
    
    # 模式3: 直接调用文字检测器
    print("🔧 模式3: 直接调用文字检测器")
    start_time = time.time()
    
    detector_result = ocr_model.text_detector(img)
    
    elapsed = time.time() - start_time
    print(f"⏱️ 耗时: {elapsed:.2f}秒")
    
    if detector_result is not None and len(detector_result) > 0:
        print(f"🎪 检测到 {len(detector_result)} 个文字区域（原始检测器输出）")
        for i, bbox in enumerate(detector_result[:3]):  # 只显示前3个
            bbox_array = np.array(bbox)
            center_x = np.mean(bbox_array[:, 0])
            center_y = np.mean(bbox_array[:, 1])
            width = np.max(bbox_array[:, 0]) - np.min(bbox_array[:, 0])
            height = np.max(bbox_array[:, 1]) - np.min(bbox_array[:, 1])
            print(f"  {i+1}. 中心({center_x:.0f},{center_y:.0f}) 尺寸({width:.0f}x{height:.0f})")
        if len(detector_result) > 3:
            print(f"  ... 还有 {len(detector_result) - 3} 个区域")
    
    print("\n" + "="*60)
    
    # 性能对比总结
    print("📈 性能对比总结:")
    print("  模式1 (完整OCR): 最慢，但提供完整的文字内容和坐标")
    print("  模式2 (仅检测): 较快，只提供文字区域坐标")
    print("  模式3 (检测器): 最快，提供原始检测结果")
    print("\n💡 推荐使用场景:")
    print("  - 需要文字内容: 使用模式1")
    print("  - 只需要区域位置: 使用模式2或3")
    print("  - 批量处理/实时应用: 使用模式2或3，然后选择性识别")

if __name__ == '__main__':
    # 如果没有参数，使用默认的测试图片
    if len(sys.argv) < 2:
        # 尝试找到一个可用的测试图片
        project_root = Path(__file__).parent.parent.parent
        test_paths = [
            project_root / "static/漫画/image/鬼-巷第001卷/第一章/test/tmp/0004_鬼-巷第001卷_text_mask.png",
            project_root / "static/漫画/image/鬼-巷第001卷/第一章/0004_鬼-巷第001卷.jpeg",
        ]
        
        image_path = None
        for test_path in test_paths:
            if test_path.exists():
                image_path = str(test_path)
                print(f"[INFO] 使用默认测试图片: {test_path.name}")
                break
        
        if image_path is None:
            print("Usage: python test_onnxocr_detection_modes.py <image_path>")
            print("No test image found. Please provide an image path.")
            sys.exit(1)
    else:
        image_path = sys.argv[1]
        # 处理Windows编码问题
        try:
            # 尝试使用Path对象处理路径
            path_obj = Path(image_path)
            if not path_obj.exists():
                print(f"[ERROR] 图片文件不存在: {image_path}")
                # 尝试相对路径
                project_root = Path(__file__).parent.parent.parent
                path_obj = project_root / image_path
                if path_obj.exists():
                    image_path = str(path_obj)
                    print(f"[INFO] 使用相对路径: {path_obj}")
                else:
                    sys.exit(1)
            else:
                image_path = str(path_obj)
        except Exception as e:
            print(f"[ERROR] 路径处理错误: {e}")
            sys.exit(1)
    
    test_different_ocr_modes(image_path)