# -*- coding: utf-8 -*- """ 在图片上绘制文字块区域的绿色线框 根据OCR JSON文件中的bbox信息,在图片上标记文字区域 """ import sys import json import cv2 import numpy as np from pathlib import Path # Windows编码修复 if sys.platform == 'win32': import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') def draw_text_blocks(image_path, json_path, output_path, line_color=(0, 255, 0), line_thickness=2): """ 在图片上绘制文字块区域的绿色线框 参数: image_path: 图片路径 json_path: OCR JSON文件路径(包含bbox信息) output_path: 输出图片路径 line_color: 线框颜色(BGR格式,默认绿色 (0, 255, 0)) line_thickness: 线框粗细(默认2像素) 返回: 成功返回True,失败返回False """ try: # 读取图片(处理中文路径) img_array = np.fromfile(str(image_path), dtype=np.uint8) img = cv2.imdecode(img_array, cv2.IMREAD_COLOR) if img is None: print(f"[ERROR] 无法读取图片: {image_path}") return False print(f"[INFO] 读取图片: {Path(image_path).name}") print(f"[INFO] 图片尺寸: {img.shape[1]}x{img.shape[0]}") # 读取JSON文件 with open(json_path, 'r', encoding='utf-8') as f: json_data = json.load(f) print(f"[INFO] 读取JSON文件: {Path(json_path).name}") # 提取文字块bbox信息 text_blocks = [] # 支持两种JSON格式: # 1. dialogues格式:{ "dialogues": [{"bbox": {...}, ...}, ...] } # 2. text_blocks格式:{ "text_blocks": [{"bbox": {...}, ...}, ...] } if 'dialogues' in json_data: text_blocks = json_data['dialogues'] print(f"[INFO] 检测到 {len(text_blocks)} 个对话区域") elif 'text_blocks' in json_data: text_blocks = json_data['text_blocks'] print(f"[INFO] 检测到 {len(text_blocks)} 个文字块区域") else: print(f"[WARN] JSON文件中未找到 'dialogues' 或 'text_blocks' 字段") # 尝试直接使用根级别的数组 if isinstance(json_data, list): text_blocks = json_data print(f"[INFO] 使用根级别数组,检测到 {len(text_blocks)} 个区域") else: print(f"[ERROR] 无法解析JSON文件格式") return False # 在图片上绘制绿色矩形框 drawn_count = 0 for i, block in enumerate(text_blocks): bbox = block.get('bbox') if not bbox: continue # 支持多种bbox格式: # 1. { "x1": int, "y1": int, "x2": int, "y2": int } # 2. { "x": int, "y": int, "width": int, "height": int } # 3. [x1, y1, x2, y2] 数组格式 # 4. { "xyxy": [x1, y1, x2, y2] } x1, y1, x2, y2 = None, None, None, None if isinstance(bbox, dict): if 'x1' in bbox and 'y1' in bbox and 'x2' in bbox and 'y2' in bbox: x1, y1, x2, y2 = int(bbox['x1']), int(bbox['y1']), int(bbox['x2']), int(bbox['y2']) elif 'x' in bbox and 'y' in bbox and 'width' in bbox and 'height' in bbox: x1 = int(bbox['x']) y1 = int(bbox['y']) x2 = int(bbox['x'] + bbox['width']) y2 = int(bbox['y'] + bbox['height']) elif 'xyxy' in bbox and isinstance(bbox['xyxy'], list) and len(bbox['xyxy']) >= 4: x1, y1, x2, y2 = int(bbox['xyxy'][0]), int(bbox['xyxy'][1]), int(bbox['xyxy'][2]), int(bbox['xyxy'][3]) elif isinstance(bbox, list) and len(bbox) >= 4: x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) if x1 is not None and y1 is not None and x2 is not None and y2 is not None: # 确保坐标在图片范围内 x1 = max(0, min(x1, img.shape[1] - 1)) y1 = max(0, min(y1, img.shape[0] - 1)) x2 = max(0, min(x2, img.shape[1] - 1)) y2 = max(0, min(y2, img.shape[0] - 1)) # 确保 x1 < x2, y1 < y2 if x1 >= x2 or y1 >= y2: print(f" [WARN] 文字块 {i + 1} 的bbox无效: ({x1}, {y1}) -> ({x2}, {y2}),跳过") continue # 绘制绿色矩形框 cv2.rectangle(img, (x1, y1), (x2, y2), line_color, line_thickness) # 可选:在框上标注序号 label = str(i + 1) font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 0.6 font_thickness = 1 (text_width, text_height), baseline = cv2.getTextSize(label, font, font_scale, font_thickness) # 在左上角绘制文字背景(白色矩形) cv2.rectangle(img, (x1, y1 - text_height - baseline - 2), (x1 + text_width, y1), (255, 255, 255), -1) # 绘制文字(绿色) cv2.putText(img, label, (x1, y1 - baseline - 2), font, font_scale, line_color, font_thickness, cv2.LINE_AA) drawn_count += 1 else: print(f" [WARN] 文字块 {i + 1} 的bbox格式不支持,跳过") print(f"[INFO] 成功绘制 {drawn_count} 个文字块区域") # 保存图片(处理中文路径) success, encoded_img = cv2.imencode('.png', img) if success: encoded_img.tofile(str(output_path)) print(f"[OK] 已保存标记后的图片: {output_path}") return True else: print(f"[ERROR] 保存图片失败: {output_path}") return False except Exception as e: print(f"[ERROR] 处理失败: {e}") import traceback traceback.print_exc() return False if __name__ == '__main__': if len(sys.argv) < 4: print("用法: python draw_text_blocks.py [line_thickness]") print("示例: python draw_text_blocks.py image.png dialogues.json output.png 2") sys.exit(1) image_path = sys.argv[1] json_path = sys.argv[2] output_path = sys.argv[3] line_thickness = int(sys.argv[4]) if len(sys.argv) > 4 else 2 success = draw_text_blocks(image_path, json_path, output_path, line_color=(0, 255, 0), line_thickness=line_thickness) sys.exit(0 if success else 1)