yichael
/
AIStoryBoard


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
							import cv2
import numpy as np
import json
import sys
from pathlib import Path

def calculate_box_distance(box1, box2):
    """
    计算两个边界框之间的最短距离
    
    Args:
        box1, box2: 边界框 (x1, y1, x2, y2)
    
    Returns:
        最短距离（像素）
    """
    x1_1, y1_1, x2_1, y2_1 = box1
    x1_2, y1_2, x2_2, y2_2 = box2
    
    # 计算中心点
    center1_x = (x1_1 + x2_1) / 2
    center1_y = (y1_1 + y2_1) / 2
    center2_x = (x1_2 + x2_2) / 2
    center2_y = (y1_2 + y2_2) / 2
    
    # 计算水平和垂直间距
    horizontal_gap = 0
    vertical_gap = 0
    
    # 水平间距
    if x2_1 < x1_2:  # box1在box2左边
        horizontal_gap = x1_2 - x2_1
    elif x2_2 < x1_1:  # box2在box1左边
        horizontal_gap = x1_1 - x2_2
    else:  # 水平重叠
        horizontal_gap = 0
    
    # 垂直间距
    if y2_1 < y1_2:  # box1在box2上方
        vertical_gap = y1_2 - y2_1
    elif y2_2 < y1_1:  # box2在box1上方
        vertical_gap = y1_1 - y2_2
    else:  # 垂直重叠
        vertical_gap = 0
    
    # 返回欧式距离
    return (horizontal_gap ** 2 + vertical_gap ** 2) ** 0.5

def group_nearby_boxes(boxes, max_distance=50):
    """
    基于距离将相近的边界框分组
    
    Args:
        boxes: 边界框列表 [(x1, y1, x2, y2), ...]
        max_distance: 最大合并距离（像素）
    
    Returns:
        分组列表，每组包含相近的边界框
    """
    if not boxes:
        return []
    
    # 创建邻接表
    n = len(boxes)
    adjacent = [[] for _ in range(n)]
    
    # 计算每对边界框之间的距离
    for i in range(n):
        for j in range(i + 1, n):
            distance = calculate_box_distance(boxes[i], boxes[j])
            if distance <= max_distance:
                adjacent[i].append(j)
                adjacent[j].append(i)
    
    # 使用DFS分组连通的边界框
    visited = [False] * n
    groups = []
    
    def dfs(node, current_group):
        visited[node] = True
        current_group.append(node)
        for neighbor in adjacent[node]:
            if not visited[neighbor]:
                dfs(neighbor, current_group)
    
    for i in range(n):
        if not visited[i]:
            group = []
            dfs(i, group)
            groups.append([boxes[idx] for idx in group])
    
    return groups

def calculate_merged_bbox(boxes):
    """
    计算多个边界框的合并外接矩形
    
    Args:
        boxes: 边界框列表，每个元素为 (x1, y1, x2, y2)
    
    Returns:
        合并后的边界框 (min_x1, min_y1, max_x2, max_y2)
    """
    if not boxes:
        return None
    
    min_x1 = min(box[0] for box in boxes)
    min_y1 = min(box[1] for box in boxes)
    max_x2 = max(box[2] for box in boxes)
    max_y2 = max(box[3] for box in boxes)
    
    return (min_x1, min_y1, max_x2, max_y2)

def draw_text_regions_with_green_boxes(temp_json_path, output_image_path):
    try:
        # 确保输出编码为UTF-8
        sys.stdout.reconfigure(encoding='utf-8')
        sys.stderr.reconfigure(encoding='utf-8')
        
        print(f"[INFO] 读取文字区域数据: {temp_json_path}")
        with open(temp_json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        original_image_path = data['image_path']
        text_regions = data['text_regions']
        
        print(f"[INFO] 读取原图片: {original_image_path}")
        
        # 读取原图片（支持中文路径）
        img_array = np.fromfile(str(original_image_path), dtype=np.uint8)
        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
        
        if img is None:
            raise ValueError(f"无法读取图片: {original_image_path}")
        
        print(f"[INFO] 图片尺寸: {img.shape[:2][::-1]} (宽x高)")
        print(f"[INFO] 文字区域数量: {len(text_regions)}")
        
        # 绘制每个文字区域的绿色框
        valid_boxes = []
        for i, region in enumerate(text_regions):
            bbox = region.get('bbox') or region.get('green_box_coordinates')
            text = region.get('text', '')
            
            if bbox and all(k in bbox for k in ['x1', 'y1', 'x2', 'y2']):
                # 提取坐标
                x1, y1 = int(bbox['x1']), int(bbox['y1'])
                x2, y2 = int(bbox['x2']), int(bbox['y2'])
                
                # 保存有效的框坐标用于后续合并
                valid_boxes.append((x1, y1, x2, y2))
                
                # 绘制绿色矩形框（BGR格式，绿色为(0, 255, 0)）
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
                
                # 可选：添加文字标签
                if text and len(text) > 0:
                    # 在框的上方添加文字（如果空间足够）
                    label_y = max(y1 - 5, 15)
                    cv2.putText(img, f"{i+1}", (x1, label_y), 
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        
        # 基于距离智能分组并绘制合并边框
        if valid_boxes:
            print(f"[INFO] 对 {len(valid_boxes)} 个文字区域进行距离分析...")
            
            # 设置合并距离阈值（可调节）
            merge_distance = 80  # 像素
            print(f"[INFO] 合并距离阈值: {merge_distance} 像素")
            
            # 将相近的边界框分组
            groups = group_nearby_boxes(valid_boxes, merge_distance)
            print(f"[INFO] 分组结果: {len(groups)} 个文字群组")
            
            # 为每个分组绘制红色合并框
            colors = [(0, 0, 255), (255, 0, 0), (0, 255, 255), (255, 0, 255), (0, 165, 255)]  # 红色为主，备用其他颜色
            
            for group_idx, group_boxes in enumerate(groups):
                if len(group_boxes) >= 2:  # 只有包含2个或更多边界框的群组才绘制合并框
                    merged_bbox = calculate_merged_bbox(group_boxes)
                    
                    if merged_bbox:
                        mx1, my1, mx2, my2 = merged_bbox
                        color = colors[group_idx % len(colors)]
                        
                        print(f"[INFO] 群组{group_idx+1}: {len(group_boxes)}个框 -> 合并坐标({mx1}, {my1}) 到 ({mx2}, {my2})")
                        print(f"[INFO] 群组{group_idx+1}: 尺寸 {mx2-mx1}x{my2-my1}")
                        
                        # 绘制红色合并边框（加粗边框）
                        cv2.rectangle(img, (mx1, my1), (mx2, my2), color, 4)
                        
                        # 在合并框上方添加标签
                        label_text = f"群组{group_idx+1} ({len(group_boxes)}框)"
                        label_y = max(my1 - 15, 25)
                        cv2.putText(img, label_text, (mx1, label_y), 
                                  cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
                        
                else:
                    print(f"[INFO] 群组{group_idx+1}: {len(group_boxes)}个框（单独框，不合并）")
            
            merged_groups = sum(1 for group in groups if len(group) >= 2)
            total_merged_boxes = sum(len(group) for group in groups if len(group) >= 2)
            
            print(f"[SUCCESS] 已绘制 {merged_groups} 个红色合并边框，覆盖 {total_merged_boxes} 个文字区域")
        
        # 保存结果图片
        output_path = Path(output_image_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        
        # 使用cv2.imencode处理中文路径
        success, encoded_img = cv2.imencode('.png', img)
        if success:
            with open(str(output_path), 'wb') as f:
                f.write(encoded_img.tobytes())
            print(f"[OK] 绿色框图片已保存: {output_path}")
        else:
            raise ValueError("图片编码失败")
        
        print(f"[SUCCESS] 成功绘制 {len(text_regions)} 个文字区域的绿色框")
        
    except Exception as e:
        print(f"[ERROR] 绘制绿色框失败: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

if __name__ == '__main__':
    if len(sys.argv) != 3:
        print("Usage: python draw_text_regions.py <temp_json_path> <output_image_path>")
        sys.exit(1)
    
    temp_json_file = sys.argv[1]
    output_img = sys.argv[2]
    draw_text_regions_with_green_boxes(temp_json_file, output_img)