| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- # -*- coding: utf-8 -*-
- """
- 在图片上绘制文字块区域的绿色线框
- 根据OCR JSON文件中的bbox信息,在图片上标记文字区域
- """
- import sys
- import json
- import cv2
- import numpy as np
- from pathlib import Path
- # Windows编码修复
- if sys.platform == 'win32':
- import io
- sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
- sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
- def draw_text_blocks(image_path, json_path, output_path, line_color=(0, 255, 0), line_thickness=2):
- """
- 在图片上绘制文字块区域的绿色线框
-
- 参数:
- image_path: 图片路径
- json_path: OCR JSON文件路径(包含bbox信息)
- output_path: 输出图片路径
- line_color: 线框颜色(BGR格式,默认绿色 (0, 255, 0))
- line_thickness: 线框粗细(默认2像素)
-
- 返回:
- 成功返回True,失败返回False
- """
- try:
- # 读取图片(处理中文路径)
- img_array = np.fromfile(str(image_path), dtype=np.uint8)
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
- if img is None:
- print(f"[ERROR] 无法读取图片: {image_path}")
- return False
-
- print(f"[INFO] 读取图片: {Path(image_path).name}")
- print(f"[INFO] 图片尺寸: {img.shape[1]}x{img.shape[0]}")
-
- # 读取JSON文件
- with open(json_path, 'r', encoding='utf-8') as f:
- json_data = json.load(f)
-
- print(f"[INFO] 读取JSON文件: {Path(json_path).name}")
-
- # 提取文字块bbox信息
- text_blocks = []
-
- # 支持两种JSON格式:
- # 1. dialogues格式:{ "dialogues": [{"bbox": {...}, ...}, ...] }
- # 2. text_blocks格式:{ "text_blocks": [{"bbox": {...}, ...}, ...] }
- if 'dialogues' in json_data:
- text_blocks = json_data['dialogues']
- print(f"[INFO] 检测到 {len(text_blocks)} 个对话区域")
- elif 'text_blocks' in json_data:
- text_blocks = json_data['text_blocks']
- print(f"[INFO] 检测到 {len(text_blocks)} 个文字块区域")
- else:
- print(f"[WARN] JSON文件中未找到 'dialogues' 或 'text_blocks' 字段")
- # 尝试直接使用根级别的数组
- if isinstance(json_data, list):
- text_blocks = json_data
- print(f"[INFO] 使用根级别数组,检测到 {len(text_blocks)} 个区域")
- else:
- print(f"[ERROR] 无法解析JSON文件格式")
- return False
-
- # 在图片上绘制绿色矩形框
- drawn_count = 0
- for i, block in enumerate(text_blocks):
- bbox = block.get('bbox')
- if not bbox:
- continue
-
- # 支持多种bbox格式:
- # 1. { "x1": int, "y1": int, "x2": int, "y2": int }
- # 2. { "x": int, "y": int, "width": int, "height": int }
- # 3. [x1, y1, x2, y2] 数组格式
- # 4. { "xyxy": [x1, y1, x2, y2] }
-
- x1, y1, x2, y2 = None, None, None, None
-
- if isinstance(bbox, dict):
- if 'x1' in bbox and 'y1' in bbox and 'x2' in bbox and 'y2' in bbox:
- x1, y1, x2, y2 = int(bbox['x1']), int(bbox['y1']), int(bbox['x2']), int(bbox['y2'])
- elif 'x' in bbox and 'y' in bbox and 'width' in bbox and 'height' in bbox:
- x1 = int(bbox['x'])
- y1 = int(bbox['y'])
- x2 = int(bbox['x'] + bbox['width'])
- y2 = int(bbox['y'] + bbox['height'])
- elif 'xyxy' in bbox and isinstance(bbox['xyxy'], list) and len(bbox['xyxy']) >= 4:
- x1, y1, x2, y2 = int(bbox['xyxy'][0]), int(bbox['xyxy'][1]), int(bbox['xyxy'][2]), int(bbox['xyxy'][3])
- elif isinstance(bbox, list) and len(bbox) >= 4:
- x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
-
- if x1 is not None and y1 is not None and x2 is not None and y2 is not None:
- # 确保坐标在图片范围内
- x1 = max(0, min(x1, img.shape[1] - 1))
- y1 = max(0, min(y1, img.shape[0] - 1))
- x2 = max(0, min(x2, img.shape[1] - 1))
- y2 = max(0, min(y2, img.shape[0] - 1))
-
- # 确保 x1 < x2, y1 < y2
- if x1 >= x2 or y1 >= y2:
- print(f" [WARN] 文字块 {i + 1} 的bbox无效: ({x1}, {y1}) -> ({x2}, {y2}),跳过")
- continue
-
- # 绘制绿色矩形框
- cv2.rectangle(img, (x1, y1), (x2, y2), line_color, line_thickness)
-
- # 可选:在框上标注序号
- label = str(i + 1)
- font = cv2.FONT_HERSHEY_SIMPLEX
- font_scale = 0.6
- font_thickness = 1
- (text_width, text_height), baseline = cv2.getTextSize(label, font, font_scale, font_thickness)
-
- # 在左上角绘制文字背景(白色矩形)
- cv2.rectangle(img, (x1, y1 - text_height - baseline - 2),
- (x1 + text_width, y1), (255, 255, 255), -1)
- # 绘制文字(绿色)
- cv2.putText(img, label, (x1, y1 - baseline - 2),
- font, font_scale, line_color, font_thickness, cv2.LINE_AA)
-
- drawn_count += 1
- else:
- print(f" [WARN] 文字块 {i + 1} 的bbox格式不支持,跳过")
-
- print(f"[INFO] 成功绘制 {drawn_count} 个文字块区域")
-
- # 保存图片(处理中文路径)
- success, encoded_img = cv2.imencode('.png', img)
- if success:
- encoded_img.tofile(str(output_path))
- print(f"[OK] 已保存标记后的图片: {output_path}")
- return True
- else:
- print(f"[ERROR] 保存图片失败: {output_path}")
- return False
-
- except Exception as e:
- print(f"[ERROR] 处理失败: {e}")
- import traceback
- traceback.print_exc()
- return False
- if __name__ == '__main__':
- if len(sys.argv) < 4:
- print("用法: python draw_text_blocks.py <image_path> <json_path> <output_path> [line_thickness]")
- print("示例: python draw_text_blocks.py image.png dialogues.json output.png 2")
- sys.exit(1)
-
- image_path = sys.argv[1]
- json_path = sys.argv[2]
- output_path = sys.argv[3]
- line_thickness = int(sys.argv[4]) if len(sys.argv) > 4 else 2
-
- success = draw_text_blocks(image_path, json_path, output_path,
- line_color=(0, 255, 0), line_thickness=line_thickness)
-
- sys.exit(0 if success else 1)
|