| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234 |
- import cv2
- import numpy as np
- import json
- import sys
- from pathlib import Path
- def calculate_box_distance(box1, box2):
- """
- 计算两个边界框之间的最短距离
-
- Args:
- box1, box2: 边界框 (x1, y1, x2, y2)
-
- Returns:
- 最短距离(像素)
- """
- x1_1, y1_1, x2_1, y2_1 = box1
- x1_2, y1_2, x2_2, y2_2 = box2
-
- # 计算中心点
- center1_x = (x1_1 + x2_1) / 2
- center1_y = (y1_1 + y2_1) / 2
- center2_x = (x1_2 + x2_2) / 2
- center2_y = (y1_2 + y2_2) / 2
-
- # 计算水平和垂直间距
- horizontal_gap = 0
- vertical_gap = 0
-
- # 水平间距
- if x2_1 < x1_2: # box1在box2左边
- horizontal_gap = x1_2 - x2_1
- elif x2_2 < x1_1: # box2在box1左边
- horizontal_gap = x1_1 - x2_2
- else: # 水平重叠
- horizontal_gap = 0
-
- # 垂直间距
- if y2_1 < y1_2: # box1在box2上方
- vertical_gap = y1_2 - y2_1
- elif y2_2 < y1_1: # box2在box1上方
- vertical_gap = y1_1 - y2_2
- else: # 垂直重叠
- vertical_gap = 0
-
- # 返回欧式距离
- return (horizontal_gap ** 2 + vertical_gap ** 2) ** 0.5
- def group_nearby_boxes(boxes, max_distance=50):
- """
- 基于距离将相近的边界框分组
-
- Args:
- boxes: 边界框列表 [(x1, y1, x2, y2), ...]
- max_distance: 最大合并距离(像素)
-
- Returns:
- 分组列表,每组包含相近的边界框
- """
- if not boxes:
- return []
-
- # 创建邻接表
- n = len(boxes)
- adjacent = [[] for _ in range(n)]
-
- # 计算每对边界框之间的距离
- for i in range(n):
- for j in range(i + 1, n):
- distance = calculate_box_distance(boxes[i], boxes[j])
- if distance <= max_distance:
- adjacent[i].append(j)
- adjacent[j].append(i)
-
- # 使用DFS分组连通的边界框
- visited = [False] * n
- groups = []
-
- def dfs(node, current_group):
- visited[node] = True
- current_group.append(node)
- for neighbor in adjacent[node]:
- if not visited[neighbor]:
- dfs(neighbor, current_group)
-
- for i in range(n):
- if not visited[i]:
- group = []
- dfs(i, group)
- groups.append([boxes[idx] for idx in group])
-
- return groups
- def calculate_merged_bbox(boxes):
- """
- 计算多个边界框的合并外接矩形
-
- Args:
- boxes: 边界框列表,每个元素为 (x1, y1, x2, y2)
-
- Returns:
- 合并后的边界框 (min_x1, min_y1, max_x2, max_y2)
- """
- if not boxes:
- return None
-
- min_x1 = min(box[0] for box in boxes)
- min_y1 = min(box[1] for box in boxes)
- max_x2 = max(box[2] for box in boxes)
- max_y2 = max(box[3] for box in boxes)
-
- return (min_x1, min_y1, max_x2, max_y2)
- def draw_text_regions_with_green_boxes(temp_json_path, output_image_path):
- try:
- # 确保输出编码为UTF-8
- sys.stdout.reconfigure(encoding='utf-8')
- sys.stderr.reconfigure(encoding='utf-8')
-
- print(f"[INFO] 读取文字区域数据: {temp_json_path}")
- with open(temp_json_path, 'r', encoding='utf-8') as f:
- data = json.load(f)
-
- original_image_path = data['image_path']
- text_regions = data['text_regions']
-
- print(f"[INFO] 读取原图片: {original_image_path}")
-
- # 读取原图片(支持中文路径)
- img_array = np.fromfile(str(original_image_path), dtype=np.uint8)
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
- if img is None:
- raise ValueError(f"无法读取图片: {original_image_path}")
-
- print(f"[INFO] 图片尺寸: {img.shape[:2][::-1]} (宽x高)")
- print(f"[INFO] 文字区域数量: {len(text_regions)}")
-
- # 绘制每个文字区域的绿色框
- valid_boxes = []
- for i, region in enumerate(text_regions):
- bbox = region.get('bbox') or region.get('green_box_coordinates')
- text = region.get('text', '')
-
- if bbox and all(k in bbox for k in ['x1', 'y1', 'x2', 'y2']):
- # 提取坐标
- x1, y1 = int(bbox['x1']), int(bbox['y1'])
- x2, y2 = int(bbox['x2']), int(bbox['y2'])
-
- # 保存有效的框坐标用于后续合并
- valid_boxes.append((x1, y1, x2, y2))
-
- # 绘制绿色矩形框(BGR格式,绿色为(0, 255, 0))
- cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
-
- # 可选:添加文字标签
- if text and len(text) > 0:
- # 在框的上方添加文字(如果空间足够)
- label_y = max(y1 - 5, 15)
- cv2.putText(img, f"{i+1}", (x1, label_y),
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
-
- # 基于距离智能分组并绘制合并边框
- if valid_boxes:
- print(f"[INFO] 对 {len(valid_boxes)} 个文字区域进行距离分析...")
-
- # 设置合并距离阈值(可调节)
- merge_distance = 80 # 像素
- print(f"[INFO] 合并距离阈值: {merge_distance} 像素")
-
- # 将相近的边界框分组
- groups = group_nearby_boxes(valid_boxes, merge_distance)
- print(f"[INFO] 分组结果: {len(groups)} 个文字群组")
-
- # 为每个分组绘制红色合并框
- colors = [(0, 0, 255), (255, 0, 0), (0, 255, 255), (255, 0, 255), (0, 165, 255)] # 红色为主,备用其他颜色
-
- for group_idx, group_boxes in enumerate(groups):
- if len(group_boxes) >= 2: # 只有包含2个或更多边界框的群组才绘制合并框
- merged_bbox = calculate_merged_bbox(group_boxes)
-
- if merged_bbox:
- mx1, my1, mx2, my2 = merged_bbox
- color = colors[group_idx % len(colors)]
-
- print(f"[INFO] 群组{group_idx+1}: {len(group_boxes)}个框 -> 合并坐标({mx1}, {my1}) 到 ({mx2}, {my2})")
- print(f"[INFO] 群组{group_idx+1}: 尺寸 {mx2-mx1}x{my2-my1}")
-
- # 绘制红色合并边框(加粗边框)
- cv2.rectangle(img, (mx1, my1), (mx2, my2), color, 4)
-
- # 在合并框上方添加标签
- label_text = f"群组{group_idx+1} ({len(group_boxes)}框)"
- label_y = max(my1 - 15, 25)
- cv2.putText(img, label_text, (mx1, label_y),
- cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
-
- else:
- print(f"[INFO] 群组{group_idx+1}: {len(group_boxes)}个框(单独框,不合并)")
-
- merged_groups = sum(1 for group in groups if len(group) >= 2)
- total_merged_boxes = sum(len(group) for group in groups if len(group) >= 2)
-
- print(f"[SUCCESS] 已绘制 {merged_groups} 个红色合并边框,覆盖 {total_merged_boxes} 个文字区域")
-
- # 保存结果图片
- output_path = Path(output_image_path)
- output_path.parent.mkdir(parents=True, exist_ok=True)
-
- # 使用cv2.imencode处理中文路径
- success, encoded_img = cv2.imencode('.png', img)
- if success:
- with open(str(output_path), 'wb') as f:
- f.write(encoded_img.tobytes())
- print(f"[OK] 绿色框图片已保存: {output_path}")
- else:
- raise ValueError("图片编码失败")
-
- print(f"[SUCCESS] 成功绘制 {len(text_regions)} 个文字区域的绿色框")
-
- except Exception as e:
- print(f"[ERROR] 绘制绿色框失败: {e}")
- import traceback
- traceback.print_exc()
- sys.exit(1)
- if __name__ == '__main__':
- if len(sys.argv) != 3:
- print("Usage: python draw_text_regions.py <temp_json_path> <output_image_path>")
- sys.exit(1)
-
- temp_json_file = sys.argv[1]
- output_img = sys.argv[2]
- draw_text_regions_with_green_boxes(temp_json_file, output_img)
|