draw_text_blocks.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. # -*- coding: utf-8 -*-
  2. """
  3. 在图片上绘制文字块区域的绿色线框
  4. 根据OCR JSON文件中的bbox信息,在图片上标记文字区域
  5. """
  6. import sys
  7. import json
  8. import cv2
  9. import numpy as np
  10. from pathlib import Path
  11. # Windows编码修复
  12. if sys.platform == 'win32':
  13. import io
  14. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
  15. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
  16. def draw_text_blocks(image_path, json_path, output_path, line_color=(0, 255, 0), line_thickness=2):
  17. """
  18. 在图片上绘制文字块区域的绿色线框
  19. 参数:
  20. image_path: 图片路径
  21. json_path: OCR JSON文件路径(包含bbox信息)
  22. output_path: 输出图片路径
  23. line_color: 线框颜色(BGR格式,默认绿色 (0, 255, 0))
  24. line_thickness: 线框粗细(默认2像素)
  25. 返回:
  26. 成功返回True,失败返回False
  27. """
  28. try:
  29. # 读取图片(处理中文路径)
  30. img_array = np.fromfile(str(image_path), dtype=np.uint8)
  31. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  32. if img is None:
  33. print(f"[ERROR] 无法读取图片: {image_path}")
  34. return False
  35. print(f"[INFO] 读取图片: {Path(image_path).name}")
  36. print(f"[INFO] 图片尺寸: {img.shape[1]}x{img.shape[0]}")
  37. # 读取JSON文件
  38. with open(json_path, 'r', encoding='utf-8') as f:
  39. json_data = json.load(f)
  40. print(f"[INFO] 读取JSON文件: {Path(json_path).name}")
  41. # 提取文字块bbox信息
  42. text_blocks = []
  43. # 支持两种JSON格式:
  44. # 1. dialogues格式:{ "dialogues": [{"bbox": {...}, ...}, ...] }
  45. # 2. text_blocks格式:{ "text_blocks": [{"bbox": {...}, ...}, ...] }
  46. if 'dialogues' in json_data:
  47. text_blocks = json_data['dialogues']
  48. print(f"[INFO] 检测到 {len(text_blocks)} 个对话区域")
  49. elif 'text_blocks' in json_data:
  50. text_blocks = json_data['text_blocks']
  51. print(f"[INFO] 检测到 {len(text_blocks)} 个文字块区域")
  52. else:
  53. print(f"[WARN] JSON文件中未找到 'dialogues' 或 'text_blocks' 字段")
  54. # 尝试直接使用根级别的数组
  55. if isinstance(json_data, list):
  56. text_blocks = json_data
  57. print(f"[INFO] 使用根级别数组,检测到 {len(text_blocks)} 个区域")
  58. else:
  59. print(f"[ERROR] 无法解析JSON文件格式")
  60. return False
  61. # 在图片上绘制绿色矩形框
  62. drawn_count = 0
  63. for i, block in enumerate(text_blocks):
  64. bbox = block.get('bbox')
  65. if not bbox:
  66. continue
  67. # 支持多种bbox格式:
  68. # 1. { "x1": int, "y1": int, "x2": int, "y2": int }
  69. # 2. { "x": int, "y": int, "width": int, "height": int }
  70. # 3. [x1, y1, x2, y2] 数组格式
  71. # 4. { "xyxy": [x1, y1, x2, y2] }
  72. x1, y1, x2, y2 = None, None, None, None
  73. if isinstance(bbox, dict):
  74. if 'x1' in bbox and 'y1' in bbox and 'x2' in bbox and 'y2' in bbox:
  75. x1, y1, x2, y2 = int(bbox['x1']), int(bbox['y1']), int(bbox['x2']), int(bbox['y2'])
  76. elif 'x' in bbox and 'y' in bbox and 'width' in bbox and 'height' in bbox:
  77. x1 = int(bbox['x'])
  78. y1 = int(bbox['y'])
  79. x2 = int(bbox['x'] + bbox['width'])
  80. y2 = int(bbox['y'] + bbox['height'])
  81. elif 'xyxy' in bbox and isinstance(bbox['xyxy'], list) and len(bbox['xyxy']) >= 4:
  82. x1, y1, x2, y2 = int(bbox['xyxy'][0]), int(bbox['xyxy'][1]), int(bbox['xyxy'][2]), int(bbox['xyxy'][3])
  83. elif isinstance(bbox, list) and len(bbox) >= 4:
  84. x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
  85. if x1 is not None and y1 is not None and x2 is not None and y2 is not None:
  86. # 确保坐标在图片范围内
  87. x1 = max(0, min(x1, img.shape[1] - 1))
  88. y1 = max(0, min(y1, img.shape[0] - 1))
  89. x2 = max(0, min(x2, img.shape[1] - 1))
  90. y2 = max(0, min(y2, img.shape[0] - 1))
  91. # 确保 x1 < x2, y1 < y2
  92. if x1 >= x2 or y1 >= y2:
  93. print(f" [WARN] 文字块 {i + 1} 的bbox无效: ({x1}, {y1}) -> ({x2}, {y2}),跳过")
  94. continue
  95. # 绘制绿色矩形框
  96. cv2.rectangle(img, (x1, y1), (x2, y2), line_color, line_thickness)
  97. # 可选:在框上标注序号
  98. label = str(i + 1)
  99. font = cv2.FONT_HERSHEY_SIMPLEX
  100. font_scale = 0.6
  101. font_thickness = 1
  102. (text_width, text_height), baseline = cv2.getTextSize(label, font, font_scale, font_thickness)
  103. # 在左上角绘制文字背景(白色矩形)
  104. cv2.rectangle(img, (x1, y1 - text_height - baseline - 2),
  105. (x1 + text_width, y1), (255, 255, 255), -1)
  106. # 绘制文字(绿色)
  107. cv2.putText(img, label, (x1, y1 - baseline - 2),
  108. font, font_scale, line_color, font_thickness, cv2.LINE_AA)
  109. drawn_count += 1
  110. else:
  111. print(f" [WARN] 文字块 {i + 1} 的bbox格式不支持,跳过")
  112. print(f"[INFO] 成功绘制 {drawn_count} 个文字块区域")
  113. # 保存图片(处理中文路径)
  114. success, encoded_img = cv2.imencode('.png', img)
  115. if success:
  116. encoded_img.tofile(str(output_path))
  117. print(f"[OK] 已保存标记后的图片: {output_path}")
  118. return True
  119. else:
  120. print(f"[ERROR] 保存图片失败: {output_path}")
  121. return False
  122. except Exception as e:
  123. print(f"[ERROR] 处理失败: {e}")
  124. import traceback
  125. traceback.print_exc()
  126. return False
  127. if __name__ == '__main__':
  128. if len(sys.argv) < 4:
  129. print("用法: python draw_text_blocks.py <image_path> <json_path> <output_path> [line_thickness]")
  130. print("示例: python draw_text_blocks.py image.png dialogues.json output.png 2")
  131. sys.exit(1)
  132. image_path = sys.argv[1]
  133. json_path = sys.argv[2]
  134. output_path = sys.argv[3]
  135. line_thickness = int(sys.argv[4]) if len(sys.argv) > 4 else 2
  136. success = draw_text_blocks(image_path, json_path, output_path,
  137. line_color=(0, 255, 0), line_thickness=line_thickness)
  138. sys.exit(0 if success else 1)