import cv2 import numpy as np import json import sys from pathlib import Path def calculate_box_distance(box1, box2): """ 计算两个边界框之间的最短距离 Args: box1, box2: 边界框 (x1, y1, x2, y2) Returns: 最短距离(像素) """ x1_1, y1_1, x2_1, y2_1 = box1 x1_2, y1_2, x2_2, y2_2 = box2 # 计算中心点 center1_x = (x1_1 + x2_1) / 2 center1_y = (y1_1 + y2_1) / 2 center2_x = (x1_2 + x2_2) / 2 center2_y = (y1_2 + y2_2) / 2 # 计算水平和垂直间距 horizontal_gap = 0 vertical_gap = 0 # 水平间距 if x2_1 < x1_2: # box1在box2左边 horizontal_gap = x1_2 - x2_1 elif x2_2 < x1_1: # box2在box1左边 horizontal_gap = x1_1 - x2_2 else: # 水平重叠 horizontal_gap = 0 # 垂直间距 if y2_1 < y1_2: # box1在box2上方 vertical_gap = y1_2 - y2_1 elif y2_2 < y1_1: # box2在box1上方 vertical_gap = y1_1 - y2_2 else: # 垂直重叠 vertical_gap = 0 # 返回欧式距离 return (horizontal_gap ** 2 + vertical_gap ** 2) ** 0.5 def group_nearby_boxes(boxes, max_distance=50): """ 基于距离将相近的边界框分组 Args: boxes: 边界框列表 [(x1, y1, x2, y2), ...] max_distance: 最大合并距离(像素) Returns: 分组列表,每组包含相近的边界框 """ if not boxes: return [] # 创建邻接表 n = len(boxes) adjacent = [[] for _ in range(n)] # 计算每对边界框之间的距离 for i in range(n): for j in range(i + 1, n): distance = calculate_box_distance(boxes[i], boxes[j]) if distance <= max_distance: adjacent[i].append(j) adjacent[j].append(i) # 使用DFS分组连通的边界框 visited = [False] * n groups = [] def dfs(node, current_group): visited[node] = True current_group.append(node) for neighbor in adjacent[node]: if not visited[neighbor]: dfs(neighbor, current_group) for i in range(n): if not visited[i]: group = [] dfs(i, group) groups.append([boxes[idx] for idx in group]) return groups def calculate_merged_bbox(boxes): """ 计算多个边界框的合并外接矩形 Args: boxes: 边界框列表,每个元素为 (x1, y1, x2, y2) Returns: 合并后的边界框 (min_x1, min_y1, max_x2, max_y2) """ if not boxes: return None min_x1 = min(box[0] for box in boxes) min_y1 = min(box[1] for box in boxes) max_x2 = max(box[2] for box in boxes) max_y2 = max(box[3] for box in boxes) return (min_x1, min_y1, max_x2, max_y2) def draw_text_regions_with_green_boxes(temp_json_path, output_image_path): try: # 确保输出编码为UTF-8 sys.stdout.reconfigure(encoding='utf-8') sys.stderr.reconfigure(encoding='utf-8') print(f"[INFO] 读取文字区域数据: {temp_json_path}") with open(temp_json_path, 'r', encoding='utf-8') as f: data = json.load(f) original_image_path = data['image_path'] text_regions = data['text_regions'] print(f"[INFO] 读取原图片: {original_image_path}") # 读取原图片(支持中文路径) img_array = np.fromfile(str(original_image_path), dtype=np.uint8) img = cv2.imdecode(img_array, cv2.IMREAD_COLOR) if img is None: raise ValueError(f"无法读取图片: {original_image_path}") print(f"[INFO] 图片尺寸: {img.shape[:2][::-1]} (宽x高)") print(f"[INFO] 文字区域数量: {len(text_regions)}") # 绘制每个文字区域的绿色框 valid_boxes = [] for i, region in enumerate(text_regions): bbox = region.get('bbox') or region.get('green_box_coordinates') text = region.get('text', '') if bbox and all(k in bbox for k in ['x1', 'y1', 'x2', 'y2']): # 提取坐标 x1, y1 = int(bbox['x1']), int(bbox['y1']) x2, y2 = int(bbox['x2']), int(bbox['y2']) # 保存有效的框坐标用于后续合并 valid_boxes.append((x1, y1, x2, y2)) # 绘制绿色矩形框(BGR格式,绿色为(0, 255, 0)) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) # 可选:添加文字标签 if text and len(text) > 0: # 在框的上方添加文字(如果空间足够) label_y = max(y1 - 5, 15) cv2.putText(img, f"{i+1}", (x1, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) # 基于距离智能分组并绘制合并边框 if valid_boxes: print(f"[INFO] 对 {len(valid_boxes)} 个文字区域进行距离分析...") # 设置合并距离阈值(可调节) merge_distance = 80 # 像素 print(f"[INFO] 合并距离阈值: {merge_distance} 像素") # 将相近的边界框分组 groups = group_nearby_boxes(valid_boxes, merge_distance) print(f"[INFO] 分组结果: {len(groups)} 个文字群组") # 为每个分组绘制红色合并框 colors = [(0, 0, 255), (255, 0, 0), (0, 255, 255), (255, 0, 255), (0, 165, 255)] # 红色为主,备用其他颜色 for group_idx, group_boxes in enumerate(groups): if len(group_boxes) >= 2: # 只有包含2个或更多边界框的群组才绘制合并框 merged_bbox = calculate_merged_bbox(group_boxes) if merged_bbox: mx1, my1, mx2, my2 = merged_bbox color = colors[group_idx % len(colors)] print(f"[INFO] 群组{group_idx+1}: {len(group_boxes)}个框 -> 合并坐标({mx1}, {my1}) 到 ({mx2}, {my2})") print(f"[INFO] 群组{group_idx+1}: 尺寸 {mx2-mx1}x{my2-my1}") # 绘制红色合并边框(加粗边框) cv2.rectangle(img, (mx1, my1), (mx2, my2), color, 4) # 在合并框上方添加标签 label_text = f"群组{group_idx+1} ({len(group_boxes)}框)" label_y = max(my1 - 15, 25) cv2.putText(img, label_text, (mx1, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2) else: print(f"[INFO] 群组{group_idx+1}: {len(group_boxes)}个框(单独框,不合并)") merged_groups = sum(1 for group in groups if len(group) >= 2) total_merged_boxes = sum(len(group) for group in groups if len(group) >= 2) print(f"[SUCCESS] 已绘制 {merged_groups} 个红色合并边框,覆盖 {total_merged_boxes} 个文字区域") # 保存结果图片 output_path = Path(output_image_path) output_path.parent.mkdir(parents=True, exist_ok=True) # 使用cv2.imencode处理中文路径 success, encoded_img = cv2.imencode('.png', img) if success: with open(str(output_path), 'wb') as f: f.write(encoded_img.tobytes()) print(f"[OK] 绿色框图片已保存: {output_path}") else: raise ValueError("图片编码失败") print(f"[SUCCESS] 成功绘制 {len(text_regions)} 个文字区域的绿色框") except Exception as e: print(f"[ERROR] 绘制绿色框失败: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == '__main__': if len(sys.argv) != 3: print("Usage: python draw_text_regions.py ") sys.exit(1) temp_json_file = sys.argv[1] output_img = sys.argv[2] draw_text_regions_with_green_boxes(temp_json_file, output_img)