ocr_with_onnxocr.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 使用OnnxOCR进行OCR识别的脚本
  5. 替代PaddleOCR,解决崩溃问题
  6. """
  7. import os
  8. import sys
  9. import json
  10. import cv2
  11. import numpy as np
  12. from pathlib import Path
  13. import warnings
  14. # 抑制警告
  15. warnings.filterwarnings('ignore')
  16. def ocr_with_onnxocr(image_path, text_mask_path, output_dir, config_json="{}"):
  17. """
  18. 使用OnnxOCR进行OCR识别
  19. Args:
  20. image_path: 输入图片路径
  21. text_mask_path: 文字遮罩图片路径(可为空)
  22. output_dir: 输出目录
  23. config_json: OnnxOCR配置参数(JSON字符串)
  24. Returns:
  25. dict: OCR结果
  26. """
  27. try:
  28. # 转换为Path对象以更好地处理中文路径
  29. image_path = Path(image_path)
  30. output_dir = Path(output_dir)
  31. print(f"[INFO] 开始OCR识别...")
  32. print(f"[INFO] 输入图片: {image_path}")
  33. print(f"[INFO] 输出目录: {output_dir}")
  34. # 确保输出目录存在
  35. output_dir.mkdir(parents=True, exist_ok=True)
  36. # 导入OnnxOCR
  37. onnxocr_path = Path(__file__).parent.parent / 'OnnxOCR-main'
  38. if onnxocr_path.exists():
  39. sys.path.insert(0, str(onnxocr_path))
  40. print(f"[INFO] 使用本地OnnxOCR路径: {onnxocr_path}")
  41. else:
  42. print(f"[WARN] 未找到本地OnnxOCR,尝试使用已安装的版本")
  43. from onnxocr.onnx_paddleocr import ONNXPaddleOcr
  44. # 解析配置参数
  45. try:
  46. config = json.loads(config_json) if config_json and config_json != '{}' else {}
  47. print(f"[INFO] 接收到配置参数: {len(config)} 个参数")
  48. if config:
  49. print(f"[INFO] 配置详情: {config}")
  50. except json.JSONDecodeError as e:
  51. print(f"[WARN] 配置参数解析失败: {e},使用默认配置")
  52. config = {}
  53. # 设置默认高精度配置
  54. default_config = {
  55. "use_angle_cls": True, # 启用角度分类器
  56. "use_gpu": False, # 使用CPU
  57. "det_db_thresh": 0.2, # 降低检测阈值(更敏感)
  58. "det_db_box_thresh": 0.5, # 降低框置信度阈值(检测更多)
  59. "det_limit_side_len": 1280, # 提高图片处理尺寸(更高精度)
  60. "drop_score": 0.3, # 降低识别置信度阈值(保留更多结果)
  61. "rec_image_shape": "3, 48, 320" # 保持模型兼容的图片尺寸
  62. }
  63. # 合并配置(用户配置覆盖默认配置)
  64. final_config = {**default_config, **config}
  65. # 初始化OnnxOCR
  66. print("[INFO] 初始化OnnxOCR...")
  67. print(f"[INFO] 最终配置: {final_config}")
  68. ocr_model = ONNXPaddleOcr(**final_config)
  69. print("[INFO] OnnxOCR 初始化成功")
  70. # 显示关键配置参数
  71. key_params = ['det_db_thresh', 'det_db_box_thresh', 'det_limit_side_len', 'drop_score', 'use_angle_cls']
  72. config_summary = {k: final_config.get(k, 'default') for k in key_params}
  73. print(f"[INFO] 关键参数: {config_summary}")
  74. # 读取图片
  75. print(f"[INFO] 读取图片: {image_path}")
  76. # 使用cv2.imdecode处理中文路径
  77. img_array = np.fromfile(str(image_path), dtype=np.uint8)
  78. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  79. if img is None:
  80. raise ValueError(f"无法读取图片: {image_path}")
  81. print(f"[INFO] 图片读取成功,尺寸: {img.shape}")
  82. # 执行OCR识别(明确启用角度分类器)
  83. print("[INFO] 正在进行OCR识别(启用角度分类器)...")
  84. ocr_result = ocr_model.ocr(img, det=True, rec=True, cls=True)
  85. if not ocr_result or not ocr_result[0]:
  86. print("[WARN] 未检测到文字")
  87. return {"dialogues": [], "total_dialogues": 0}
  88. # 转换结果格式为与原PaddleOCR脚本兼容的格式
  89. dialogues = []
  90. for detection in ocr_result[0]:
  91. bbox, (text, confidence) = detection
  92. # 转换边界框格式
  93. dialogue = {
  94. "bbox": bbox, # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
  95. "text": text,
  96. "confidence": float(confidence)
  97. }
  98. dialogues.append(dialogue)
  99. print(f"[INFO] OCR识别完成,检测到 {len(dialogues)} 个文字块")
  100. # 保存结果到JSON文件(保持与原格式兼容)
  101. result_data = {
  102. "dialogues": dialogues,
  103. "total_dialogues": len(dialogues),
  104. "image_path": str(image_path),
  105. "ocr_engine": "OnnxOCR"
  106. }
  107. # 生成输出文件名(保持与原PaddleOCR脚本兼容的命名)
  108. image_name = image_path.stem
  109. json_output_path = output_dir / f"{image_name}_dialogues.json"
  110. with open(json_output_path, 'w', encoding='utf-8') as f:
  111. json.dump(result_data, f, ensure_ascii=False, indent=2)
  112. print(f"[INFO] OCR结果已保存到: {json_output_path}")
  113. # 打印部分结果用于调试
  114. print(f"[INFO] 识别结果预览:")
  115. for i, dialogue in enumerate(dialogues[:5]): # 只显示前5个
  116. print(f" {i+1}. {dialogue['text']} (置信度: {dialogue['confidence']:.3f})")
  117. if len(dialogues) > 5:
  118. print(f" ... 还有 {len(dialogues) - 5} 个文字块")
  119. return result_data
  120. except Exception as e:
  121. print(f"[ERROR] OCR识别失败: {e}")
  122. import traceback
  123. traceback.print_exc()
  124. raise
  125. def main():
  126. """主函数"""
  127. if len(sys.argv) < 3:
  128. print("用法: python ocr_with_onnxocr.py <image_path> <text_mask_path> <output_dir> [config_json]")
  129. print("示例:")
  130. print(' python ocr_with_onnxocr.py "image.png" "" "output/" \'{"det_db_thresh":0.1}\'')
  131. sys.exit(1)
  132. image_path = sys.argv[1]
  133. text_mask_path = sys.argv[2] if len(sys.argv) > 2 and sys.argv[2] else ""
  134. output_dir = sys.argv[3] if len(sys.argv) > 3 else "."
  135. config_json = sys.argv[4] if len(sys.argv) > 4 else "{}"
  136. print(f"[DEBUG] 开始处理OCR任务...")
  137. try:
  138. result = ocr_with_onnxocr(image_path, text_mask_path, output_dir, config_json)
  139. print(f"[SUCCESS] OCR识别完成,共识别 {result['total_dialogues']} 个文字块")
  140. sys.exit(0)
  141. except Exception as e:
  142. print(f"[ERROR] 程序执行失败: {e}")
  143. sys.exit(1)
  144. if __name__ == "__main__":
  145. main()