yichael
/
AIStoryBoard


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
使用OnnxOCR进行OCR识别的脚本
替代PaddleOCR，解决崩溃问题
"""

import os
import sys
import json
import cv2
import numpy as np
from pathlib import Path
import warnings

# 抑制警告
warnings.filterwarnings('ignore')

def ocr_with_onnxocr(image_path, text_mask_path, output_dir, config_json="{}"):
    """
    使用OnnxOCR进行OCR识别
    
    Args:
        image_path: 输入图片路径
        text_mask_path: 文字遮罩图片路径（可为空）
        output_dir: 输出目录
        config_json: OnnxOCR配置参数（JSON字符串）
    
    Returns:
        dict: OCR结果
    """
    try:
        # 转换为Path对象以更好地处理中文路径
        image_path = Path(image_path)
        output_dir = Path(output_dir)
        
        print(f"[INFO] 开始OCR识别...")
        print(f"[INFO] 输入图片: {image_path}")
        print(f"[INFO] 输出目录: {output_dir}")
        
        # 确保输出目录存在
        output_dir.mkdir(parents=True, exist_ok=True)
        
        # 导入OnnxOCR
        onnxocr_path = Path(__file__).parent.parent / 'OnnxOCR-main'
        if onnxocr_path.exists():
            sys.path.insert(0, str(onnxocr_path))
            print(f"[INFO] 使用本地OnnxOCR路径: {onnxocr_path}")
        else:
            print(f"[WARN] 未找到本地OnnxOCR，尝试使用已安装的版本")
        
        from onnxocr.onnx_paddleocr import ONNXPaddleOcr
        
        # 解析配置参数
        try:
            config = json.loads(config_json) if config_json and config_json != '{}' else {}
            print(f"[INFO] 接收到配置参数: {len(config)} 个参数")
            if config:
                print(f"[INFO] 配置详情: {config}")
        except json.JSONDecodeError as e:
            print(f"[WARN] 配置参数解析失败: {e}，使用默认配置")
            config = {}
        
        # 设置默认高精度配置
        default_config = {
            "use_angle_cls": True,           # 启用角度分类器
            "use_gpu": False,                # 使用CPU
            "det_db_thresh": 0.2,            # 降低检测阈值（更敏感）
            "det_db_box_thresh": 0.5,        # 降低框置信度阈值（检测更多）
            "det_limit_side_len": 1280,      # 提高图片处理尺寸（更高精度）
            "drop_score": 0.3,               # 降低识别置信度阈值（保留更多结果）
            "rec_image_shape": "3, 48, 320"  # 保持模型兼容的图片尺寸
        }
        
        # 合并配置（用户配置覆盖默认配置）
        final_config = {**default_config, **config}
        
        # 初始化OnnxOCR
        print("[INFO] 初始化OnnxOCR...")
        print(f"[INFO] 最终配置: {final_config}")
        
        ocr_model = ONNXPaddleOcr(**final_config)
        print("[INFO] OnnxOCR 初始化成功")
        
        # 显示关键配置参数
        key_params = ['det_db_thresh', 'det_db_box_thresh', 'det_limit_side_len', 'drop_score', 'use_angle_cls']
        config_summary = {k: final_config.get(k, 'default') for k in key_params}
        print(f"[INFO] 关键参数: {config_summary}")
        
        # 读取图片
        print(f"[INFO] 读取图片: {image_path}")
        
        # 使用cv2.imdecode处理中文路径
        img_array = np.fromfile(str(image_path), dtype=np.uint8)
        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
        
        if img is None:
            raise ValueError(f"无法读取图片: {image_path}")
        
        print(f"[INFO] 图片读取成功，尺寸: {img.shape}")
        
        # 执行OCR识别（明确启用角度分类器）
        print("[INFO] 正在进行OCR识别（启用角度分类器）...")
        ocr_result = ocr_model.ocr(img, det=True, rec=True, cls=True)
        
        if not ocr_result or not ocr_result[0]:
            print("[WARN] 未检测到文字")
            return {"dialogues": [], "total_dialogues": 0}
        
        # 转换结果格式为与原PaddleOCR脚本兼容的格式
        dialogues = []
        for detection in ocr_result[0]:
            bbox, (text, confidence) = detection
            
            # 转换边界框格式
            dialogue = {
                "bbox": bbox,  # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
                "text": text,
                "confidence": float(confidence)
            }
            dialogues.append(dialogue)
        
        print(f"[INFO] OCR识别完成，检测到 {len(dialogues)} 个文字块")
        
        # 保存结果到JSON文件（保持与原格式兼容）
        result_data = {
            "dialogues": dialogues,
            "total_dialogues": len(dialogues),
            "image_path": str(image_path),
            "ocr_engine": "OnnxOCR"
        }
        
        # 生成输出文件名（保持与原PaddleOCR脚本兼容的命名）
        image_name = image_path.stem
        json_output_path = output_dir / f"{image_name}_dialogues.json"
        
        with open(json_output_path, 'w', encoding='utf-8') as f:
            json.dump(result_data, f, ensure_ascii=False, indent=2)
        
        print(f"[INFO] OCR结果已保存到: {json_output_path}")
        
        # 打印部分结果用于调试
        print(f"[INFO] 识别结果预览:")
        for i, dialogue in enumerate(dialogues[:5]):  # 只显示前5个
            print(f"  {i+1}. {dialogue['text']} (置信度: {dialogue['confidence']:.3f})")
        
        if len(dialogues) > 5:
            print(f"  ... 还有 {len(dialogues) - 5} 个文字块")
        
        return result_data
        
    except Exception as e:
        print(f"[ERROR] OCR识别失败: {e}")
        import traceback
        traceback.print_exc()
        raise


def main():
    """主函数"""
    if len(sys.argv) < 3:
        print("用法: python ocr_with_onnxocr.py <image_path> <text_mask_path> <output_dir> [config_json]")
        print("示例:")
        print('  python ocr_with_onnxocr.py "image.png" "" "output/" \'{"det_db_thresh":0.1}\'')
        sys.exit(1)
    
    image_path = sys.argv[1]
    text_mask_path = sys.argv[2] if len(sys.argv) > 2 and sys.argv[2] else ""
    output_dir = sys.argv[3] if len(sys.argv) > 3 else "."
    config_json = sys.argv[4] if len(sys.argv) > 4 else "{}"
    
    print(f"[DEBUG] 开始处理OCR任务...")
    
    try:
        result = ocr_with_onnxocr(image_path, text_mask_path, output_dir, config_json)
        print(f"[SUCCESS] OCR识别完成，共识别 {result['total_dialogues']} 个文字块")
        sys.exit(0)
    except Exception as e:
        print(f"[ERROR] 程序执行失败: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()