| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 使用OnnxOCR进行OCR识别的脚本
- 替代PaddleOCR,解决崩溃问题
- """
- import os
- import sys
- import json
- import cv2
- import numpy as np
- from pathlib import Path
- import warnings
- # 抑制警告
- warnings.filterwarnings('ignore')
- def ocr_with_onnxocr(image_path, text_mask_path, output_dir, config_json="{}"):
- """
- 使用OnnxOCR进行OCR识别
-
- Args:
- image_path: 输入图片路径
- text_mask_path: 文字遮罩图片路径(可为空)
- output_dir: 输出目录
- config_json: OnnxOCR配置参数(JSON字符串)
-
- Returns:
- dict: OCR结果
- """
- try:
- # 转换为Path对象以更好地处理中文路径
- image_path = Path(image_path)
- output_dir = Path(output_dir)
-
- print(f"[INFO] 开始OCR识别...")
- print(f"[INFO] 输入图片: {image_path}")
- print(f"[INFO] 输出目录: {output_dir}")
-
- # 确保输出目录存在
- output_dir.mkdir(parents=True, exist_ok=True)
-
- # 导入OnnxOCR
- onnxocr_path = Path(__file__).parent.parent / 'OnnxOCR-main'
- if onnxocr_path.exists():
- sys.path.insert(0, str(onnxocr_path))
- print(f"[INFO] 使用本地OnnxOCR路径: {onnxocr_path}")
- else:
- print(f"[WARN] 未找到本地OnnxOCR,尝试使用已安装的版本")
-
- from onnxocr.onnx_paddleocr import ONNXPaddleOcr
-
- # 解析配置参数
- try:
- config = json.loads(config_json) if config_json and config_json != '{}' else {}
- print(f"[INFO] 接收到配置参数: {len(config)} 个参数")
- if config:
- print(f"[INFO] 配置详情: {config}")
- except json.JSONDecodeError as e:
- print(f"[WARN] 配置参数解析失败: {e},使用默认配置")
- config = {}
-
- # 设置默认高精度配置
- default_config = {
- "use_angle_cls": True, # 启用角度分类器
- "use_gpu": False, # 使用CPU
- "det_db_thresh": 0.2, # 降低检测阈值(更敏感)
- "det_db_box_thresh": 0.5, # 降低框置信度阈值(检测更多)
- "det_limit_side_len": 1280, # 提高图片处理尺寸(更高精度)
- "drop_score": 0.3, # 降低识别置信度阈值(保留更多结果)
- "rec_image_shape": "3, 48, 320" # 保持模型兼容的图片尺寸
- }
-
- # 合并配置(用户配置覆盖默认配置)
- final_config = {**default_config, **config}
-
- # 初始化OnnxOCR
- print("[INFO] 初始化OnnxOCR...")
- print(f"[INFO] 最终配置: {final_config}")
-
- ocr_model = ONNXPaddleOcr(**final_config)
- print("[INFO] OnnxOCR 初始化成功")
-
- # 显示关键配置参数
- key_params = ['det_db_thresh', 'det_db_box_thresh', 'det_limit_side_len', 'drop_score', 'use_angle_cls']
- config_summary = {k: final_config.get(k, 'default') for k in key_params}
- print(f"[INFO] 关键参数: {config_summary}")
-
- # 读取图片
- print(f"[INFO] 读取图片: {image_path}")
-
- # 使用cv2.imdecode处理中文路径
- img_array = np.fromfile(str(image_path), dtype=np.uint8)
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
- if img is None:
- raise ValueError(f"无法读取图片: {image_path}")
-
- print(f"[INFO] 图片读取成功,尺寸: {img.shape}")
-
- # 执行OCR识别(明确启用角度分类器)
- print("[INFO] 正在进行OCR识别(启用角度分类器)...")
- ocr_result = ocr_model.ocr(img, det=True, rec=True, cls=True)
-
- if not ocr_result or not ocr_result[0]:
- print("[WARN] 未检测到文字")
- return {"dialogues": [], "total_dialogues": 0}
-
- # 转换结果格式为与原PaddleOCR脚本兼容的格式
- dialogues = []
- for detection in ocr_result[0]:
- bbox, (text, confidence) = detection
-
- # 转换边界框格式
- dialogue = {
- "bbox": bbox, # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
- "text": text,
- "confidence": float(confidence)
- }
- dialogues.append(dialogue)
-
- print(f"[INFO] OCR识别完成,检测到 {len(dialogues)} 个文字块")
-
- # 保存结果到JSON文件(保持与原格式兼容)
- result_data = {
- "dialogues": dialogues,
- "total_dialogues": len(dialogues),
- "image_path": str(image_path),
- "ocr_engine": "OnnxOCR"
- }
-
- # 生成输出文件名(保持与原PaddleOCR脚本兼容的命名)
- image_name = image_path.stem
- json_output_path = output_dir / f"{image_name}_dialogues.json"
-
- with open(json_output_path, 'w', encoding='utf-8') as f:
- json.dump(result_data, f, ensure_ascii=False, indent=2)
-
- print(f"[INFO] OCR结果已保存到: {json_output_path}")
-
- # 打印部分结果用于调试
- print(f"[INFO] 识别结果预览:")
- for i, dialogue in enumerate(dialogues[:5]): # 只显示前5个
- print(f" {i+1}. {dialogue['text']} (置信度: {dialogue['confidence']:.3f})")
-
- if len(dialogues) > 5:
- print(f" ... 还有 {len(dialogues) - 5} 个文字块")
-
- return result_data
-
- except Exception as e:
- print(f"[ERROR] OCR识别失败: {e}")
- import traceback
- traceback.print_exc()
- raise
- def main():
- """主函数"""
- if len(sys.argv) < 3:
- print("用法: python ocr_with_onnxocr.py <image_path> <text_mask_path> <output_dir> [config_json]")
- print("示例:")
- print(' python ocr_with_onnxocr.py "image.png" "" "output/" \'{"det_db_thresh":0.1}\'')
- sys.exit(1)
-
- image_path = sys.argv[1]
- text_mask_path = sys.argv[2] if len(sys.argv) > 2 and sys.argv[2] else ""
- output_dir = sys.argv[3] if len(sys.argv) > 3 else "."
- config_json = sys.argv[4] if len(sys.argv) > 4 else "{}"
-
- print(f"[DEBUG] 开始处理OCR任务...")
-
- try:
- result = ocr_with_onnxocr(image_path, text_mask_path, output_dir, config_json)
- print(f"[SUCCESS] OCR识别完成,共识别 {result['total_dialogues']} 个文字块")
- sys.exit(0)
- except Exception as e:
- print(f"[ERROR] 程序执行失败: {e}")
- sys.exit(1)
- if __name__ == "__main__":
- main()
|