| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- import sys
- import os
- from pathlib import Path
- import cv2
- import numpy as np
- import json
- import time
- import argparse
- # 确保输出编码为UTF-8
- sys.stdout.reconfigure(encoding='utf-8')
- sys.stderr.reconfigure(encoding='utf-8')
- # 添加OnnxOCR路径
- project_root = Path(__file__).parent.parent.parent
- onnxocr_path = project_root / 'python' / 'OnnxOCR-main'
- if onnxocr_path.exists():
- sys.path.insert(0, str(onnxocr_path))
- print(f"[INFO] 使用本地OnnxOCR路径: {onnxocr_path}")
- else:
- print(f"[ERROR] 未找到本地OnnxOCR路径: {onnxocr_path}")
- sys.exit(1)
- try:
- from onnxocr.onnx_paddleocr import ONNXPaddleOcr
- ONNXOCR_AVAILABLE = True
- except ImportError as e:
- print(f"[ERROR] 无法导入OnnxOCR模块: {e}")
- ONNXOCR_AVAILABLE = False
- sys.exit(1)
- def ocr_with_onnxocr_modes(image_path, text_mask_path, output_dir, mode="full"):
- """
- 使用OnnxOCR进行OCR识别,支持不同模式
-
- Args:
- image_path: 输入图片路径
- text_mask_path: 文字遮罩路径(可以为空)
- output_dir: 输出目录
- mode: OCR模式 - "full"(完整), "detect"(仅检测), "fast"(快速检测)
- """
-
- if not ONNXOCR_AVAILABLE:
- print("[ERROR] OnnxOCR 不可用")
- return None
-
- # 创建输出目录
- output_dir = Path(output_dir)
- output_dir.mkdir(parents=True, exist_ok=True)
-
- print(f"[INFO] OCR模式: {mode}")
- print(f"[INFO] 输入图片: {image_path}")
- print(f"[INFO] 输出目录: {output_dir}")
-
- try:
- # 初始化OnnxOCR
- print("[INFO] 初始化OnnxOCR...")
- start_init = time.time()
- onnxocr_instance = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False)
- print(f"[INFO] OnnxOCR 初始化完成 ({time.time()-start_init:.2f}秒)")
-
- # 读取图片
- print(f"[INFO] 读取图片: {image_path}")
- img_array = np.fromfile(str(image_path), dtype=np.uint8)
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
- if img is None:
- raise ValueError(f"无法读取图片: {image_path}")
-
- print(f"[INFO] 图片读取成功,尺寸: {img.shape}")
-
- # 根据模式执行不同的OCR操作
- start_ocr = time.time()
-
- if mode == "full":
- # 完整OCR模式:检测+识别+角度分类
- print("[INFO] 执行完整OCR识别(检测+识别+角度分类)...")
- ocr_result = onnxocr_instance.ocr(img, det=True, rec=True, cls=True)
-
- dialogues = []
- if ocr_result and ocr_result[0]:
- for detection in ocr_result[0]:
- bbox, (text, confidence) = detection
- dialogues.append({
- "bbox": bbox,
- "text": text,
- "confidence": float(confidence),
- "mode": "full_ocr"
- })
-
- elif mode == "detect":
- # 仅检测模式:只检测文字区域,不识别文字
- print("[INFO] 执行文字区域检测(仅坐标,不识别文字)...")
- detection_result = onnxocr_instance.ocr(img, det=True, rec=False, cls=False)
-
- dialogues = []
- if detection_result and detection_result[0]:
- for i, bbox in enumerate(detection_result[0]):
- dialogues.append({
- "bbox": bbox,
- "text": f"[区域{i+1}]", # 占位符文字
- "confidence": 1.0,
- "mode": "detection_only"
- })
-
- elif mode == "fast":
- # 快速检测模式:直接使用检测器
- print("[INFO] 执行快速文字检测(直接检测器)...")
- dt_boxes = onnxocr_instance.text_detector(img)
-
- dialogues = []
- if dt_boxes is not None and len(dt_boxes) > 0:
- for i, bbox in enumerate(dt_boxes):
- dialogues.append({
- "bbox": bbox.tolist(), # 转换numpy数组为列表
- "text": f"[快速检测{i+1}]", # 占位符文字
- "confidence": 1.0,
- "mode": "fast_detection"
- })
-
- else:
- raise ValueError(f"不支持的模式: {mode}")
-
- ocr_elapsed = time.time() - start_ocr
- print(f"[INFO] OCR处理完成 ({ocr_elapsed:.2f}秒)")
- print(f"[INFO] 检测到 {len(dialogues)} 个文字区域")
-
- # 保存结果到JSON文件
- image_name = Path(image_path).stem
- output_json_path = output_dir / f"{image_name}_dialogues_{mode}.json"
-
- result_data = {
- "dialogues": dialogues,
- "total_dialogues": len(dialogues),
- "image_path": str(image_path),
- "ocr_engine": "OnnxOCR",
- "ocr_mode": mode,
- "processing_time": {
- "initialization": f"{start_init:.2f}s",
- "ocr_processing": f"{ocr_elapsed:.2f}s",
- "total": f"{time.time()-start_init:.2f}s"
- },
- "performance_info": {
- "detected_regions": len(dialogues),
- "mode_description": {
- "full": "完整OCR:检测+识别+角度分类",
- "detect": "仅检测:只检测区域坐标,不识别文字",
- "fast": "快速检测:直接使用检测器,最快速度"
- }.get(mode, "未知模式")
- }
- }
-
- with open(output_json_path, 'w', encoding='utf-8') as f:
- json.dump(result_data, f, ensure_ascii=False, indent=2)
-
- print(f"[INFO] 结果已保存到: {output_json_path}")
-
- # 打印识别结果预览
- print("[INFO] 识别结果预览:")
- for i, d in enumerate(dialogues[:5]):
- if mode == "full":
- print(f" {i+1}. '{d['text']}' (置信度: {d['confidence']:.3f})")
- else:
- bbox = d['bbox']
- if isinstance(bbox[0], list): # 多边形格式
- bbox_array = np.array(bbox)
- center_x = np.mean(bbox_array[:, 0])
- center_y = np.mean(bbox_array[:, 1])
- width = np.max(bbox_array[:, 0]) - np.min(bbox_array[:, 0])
- height = np.max(bbox_array[:, 1]) - np.min(bbox_array[:, 1])
- print(f" {i+1}. 区域中心({center_x:.0f},{center_y:.0f}) 尺寸({width:.0f}x{height:.0f})")
-
- if len(dialogues) > 5:
- print(f" ... 还有 {len(dialogues) - 5} 个区域")
-
- print(f"[SUCCESS] OCR识别完成,共处理 {len(dialogues)} 个区域")
-
- return {
- "json_path": str(output_json_path),
- "total_count": len(dialogues),
- "mode": mode,
- "processing_time": ocr_elapsed
- }
-
- except Exception as e:
- print(f"[ERROR] OCR处理失败: {e}")
- import traceback
- traceback.print_exc()
- return None
- if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='OnnxOCR多模式文字识别')
- parser.add_argument('image_path', help='输入图片路径')
- parser.add_argument('text_mask_path', nargs='?', default='', help='文字遮罩路径(可选)')
- parser.add_argument('output_dir', help='输出目录')
- parser.add_argument('--mode', choices=['full', 'detect', 'fast'], default='full',
- help='OCR模式:full(完整OCR), detect(仅检测), fast(快速检测)')
-
- args = parser.parse_args()
-
- print(f"[DEBUG] 开始OCR处理...")
- print(f"[DEBUG] 参数: 图片={args.image_path}, 模式={args.mode}")
-
- result = ocr_with_onnxocr_modes(args.image_path, args.text_mask_path, args.output_dir, args.mode)
-
- if result:
- print(f"[SUCCESS] 处理完成: {result}")
- else:
- print("[ERROR] 处理失败")
- sys.exit(1)
|