draw_text_regions.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. import cv2
  2. import numpy as np
  3. import json
  4. import sys
  5. from pathlib import Path
  6. def calculate_box_distance(box1, box2):
  7. """
  8. 计算两个边界框之间的最短距离
  9. Args:
  10. box1, box2: 边界框 (x1, y1, x2, y2)
  11. Returns:
  12. 最短距离(像素)
  13. """
  14. x1_1, y1_1, x2_1, y2_1 = box1
  15. x1_2, y1_2, x2_2, y2_2 = box2
  16. # 计算中心点
  17. center1_x = (x1_1 + x2_1) / 2
  18. center1_y = (y1_1 + y2_1) / 2
  19. center2_x = (x1_2 + x2_2) / 2
  20. center2_y = (y1_2 + y2_2) / 2
  21. # 计算水平和垂直间距
  22. horizontal_gap = 0
  23. vertical_gap = 0
  24. # 水平间距
  25. if x2_1 < x1_2: # box1在box2左边
  26. horizontal_gap = x1_2 - x2_1
  27. elif x2_2 < x1_1: # box2在box1左边
  28. horizontal_gap = x1_1 - x2_2
  29. else: # 水平重叠
  30. horizontal_gap = 0
  31. # 垂直间距
  32. if y2_1 < y1_2: # box1在box2上方
  33. vertical_gap = y1_2 - y2_1
  34. elif y2_2 < y1_1: # box2在box1上方
  35. vertical_gap = y1_1 - y2_2
  36. else: # 垂直重叠
  37. vertical_gap = 0
  38. # 返回欧式距离
  39. return (horizontal_gap ** 2 + vertical_gap ** 2) ** 0.5
  40. def group_nearby_boxes(boxes, max_distance=50):
  41. """
  42. 基于距离将相近的边界框分组
  43. Args:
  44. boxes: 边界框列表 [(x1, y1, x2, y2), ...]
  45. max_distance: 最大合并距离(像素)
  46. Returns:
  47. 分组列表,每组包含相近的边界框
  48. """
  49. if not boxes:
  50. return []
  51. # 创建邻接表
  52. n = len(boxes)
  53. adjacent = [[] for _ in range(n)]
  54. # 计算每对边界框之间的距离
  55. for i in range(n):
  56. for j in range(i + 1, n):
  57. distance = calculate_box_distance(boxes[i], boxes[j])
  58. if distance <= max_distance:
  59. adjacent[i].append(j)
  60. adjacent[j].append(i)
  61. # 使用DFS分组连通的边界框
  62. visited = [False] * n
  63. groups = []
  64. def dfs(node, current_group):
  65. visited[node] = True
  66. current_group.append(node)
  67. for neighbor in adjacent[node]:
  68. if not visited[neighbor]:
  69. dfs(neighbor, current_group)
  70. for i in range(n):
  71. if not visited[i]:
  72. group = []
  73. dfs(i, group)
  74. groups.append([boxes[idx] for idx in group])
  75. return groups
  76. def calculate_merged_bbox(boxes):
  77. """
  78. 计算多个边界框的合并外接矩形
  79. Args:
  80. boxes: 边界框列表,每个元素为 (x1, y1, x2, y2)
  81. Returns:
  82. 合并后的边界框 (min_x1, min_y1, max_x2, max_y2)
  83. """
  84. if not boxes:
  85. return None
  86. min_x1 = min(box[0] for box in boxes)
  87. min_y1 = min(box[1] for box in boxes)
  88. max_x2 = max(box[2] for box in boxes)
  89. max_y2 = max(box[3] for box in boxes)
  90. return (min_x1, min_y1, max_x2, max_y2)
  91. def draw_text_regions_with_green_boxes(temp_json_path, output_image_path):
  92. try:
  93. # 确保输出编码为UTF-8
  94. sys.stdout.reconfigure(encoding='utf-8')
  95. sys.stderr.reconfigure(encoding='utf-8')
  96. print(f"[INFO] 读取文字区域数据: {temp_json_path}")
  97. with open(temp_json_path, 'r', encoding='utf-8') as f:
  98. data = json.load(f)
  99. original_image_path = data['image_path']
  100. text_regions = data['text_regions']
  101. print(f"[INFO] 读取原图片: {original_image_path}")
  102. # 读取原图片(支持中文路径)
  103. img_array = np.fromfile(str(original_image_path), dtype=np.uint8)
  104. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  105. if img is None:
  106. raise ValueError(f"无法读取图片: {original_image_path}")
  107. print(f"[INFO] 图片尺寸: {img.shape[:2][::-1]} (宽x高)")
  108. print(f"[INFO] 文字区域数量: {len(text_regions)}")
  109. # 绘制每个文字区域的绿色框
  110. valid_boxes = []
  111. for i, region in enumerate(text_regions):
  112. bbox = region.get('bbox') or region.get('green_box_coordinates')
  113. text = region.get('text', '')
  114. if bbox and all(k in bbox for k in ['x1', 'y1', 'x2', 'y2']):
  115. # 提取坐标
  116. x1, y1 = int(bbox['x1']), int(bbox['y1'])
  117. x2, y2 = int(bbox['x2']), int(bbox['y2'])
  118. # 保存有效的框坐标用于后续合并
  119. valid_boxes.append((x1, y1, x2, y2))
  120. # 绘制绿色矩形框(BGR格式,绿色为(0, 255, 0))
  121. cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
  122. # 可选:添加文字标签
  123. if text and len(text) > 0:
  124. # 在框的上方添加文字(如果空间足够)
  125. label_y = max(y1 - 5, 15)
  126. cv2.putText(img, f"{i+1}", (x1, label_y),
  127. cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
  128. # 基于距离智能分组并绘制合并边框
  129. if valid_boxes:
  130. print(f"[INFO] 对 {len(valid_boxes)} 个文字区域进行距离分析...")
  131. # 设置合并距离阈值(可调节)
  132. merge_distance = 80 # 像素
  133. print(f"[INFO] 合并距离阈值: {merge_distance} 像素")
  134. # 将相近的边界框分组
  135. groups = group_nearby_boxes(valid_boxes, merge_distance)
  136. print(f"[INFO] 分组结果: {len(groups)} 个文字群组")
  137. # 为每个分组绘制红色合并框
  138. colors = [(0, 0, 255), (255, 0, 0), (0, 255, 255), (255, 0, 255), (0, 165, 255)] # 红色为主,备用其他颜色
  139. for group_idx, group_boxes in enumerate(groups):
  140. if len(group_boxes) >= 2: # 只有包含2个或更多边界框的群组才绘制合并框
  141. merged_bbox = calculate_merged_bbox(group_boxes)
  142. if merged_bbox:
  143. mx1, my1, mx2, my2 = merged_bbox
  144. color = colors[group_idx % len(colors)]
  145. print(f"[INFO] 群组{group_idx+1}: {len(group_boxes)}个框 -> 合并坐标({mx1}, {my1}) 到 ({mx2}, {my2})")
  146. print(f"[INFO] 群组{group_idx+1}: 尺寸 {mx2-mx1}x{my2-my1}")
  147. # 绘制红色合并边框(加粗边框)
  148. cv2.rectangle(img, (mx1, my1), (mx2, my2), color, 4)
  149. # 在合并框上方添加标签
  150. label_text = f"群组{group_idx+1} ({len(group_boxes)}框)"
  151. label_y = max(my1 - 15, 25)
  152. cv2.putText(img, label_text, (mx1, label_y),
  153. cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
  154. else:
  155. print(f"[INFO] 群组{group_idx+1}: {len(group_boxes)}个框(单独框,不合并)")
  156. merged_groups = sum(1 for group in groups if len(group) >= 2)
  157. total_merged_boxes = sum(len(group) for group in groups if len(group) >= 2)
  158. print(f"[SUCCESS] 已绘制 {merged_groups} 个红色合并边框,覆盖 {total_merged_boxes} 个文字区域")
  159. # 保存结果图片
  160. output_path = Path(output_image_path)
  161. output_path.parent.mkdir(parents=True, exist_ok=True)
  162. # 使用cv2.imencode处理中文路径
  163. success, encoded_img = cv2.imencode('.png', img)
  164. if success:
  165. with open(str(output_path), 'wb') as f:
  166. f.write(encoded_img.tobytes())
  167. print(f"[OK] 绿色框图片已保存: {output_path}")
  168. else:
  169. raise ValueError("图片编码失败")
  170. print(f"[SUCCESS] 成功绘制 {len(text_regions)} 个文字区域的绿色框")
  171. except Exception as e:
  172. print(f"[ERROR] 绘制绿色框失败: {e}")
  173. import traceback
  174. traceback.print_exc()
  175. sys.exit(1)
  176. if __name__ == '__main__':
  177. if len(sys.argv) != 3:
  178. print("Usage: python draw_text_regions.py <temp_json_path> <output_image_path>")
  179. sys.exit(1)
  180. temp_json_file = sys.argv[1]
  181. output_img = sys.argv[2]
  182. draw_text_regions_with_green_boxes(temp_json_file, output_img)