yichael
/
AIStoryBoard


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
							# -*- coding: utf-8 -*-
"""
检测图片中的绿色线框，并沿着绿色线框切图
适用于带绿色线框标记的文字区域图片
"""

import sys
import json
import cv2
import numpy as np
from pathlib import Path

# Windows编码修复
if sys.platform == 'win32':
    import io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')


def detect_green_boxes(image_path, green_color_lower=(0, 200, 0), green_color_upper=(50, 255, 50)):
    """
    检测图片中的绿色矩形框（使用OpenCV，不使用任何外部数据）
    
    参数:
        image_path: 图片路径（包含绿色线框的图片）
        green_color_lower: 绿色范围下限（BGR格式）
        green_color_upper: 绿色范围上限（BGR格式）
    
    返回:
        检测到的绿色框列表，每个包含 bbox 信息
    """
    # 读取图片（处理中文路径）
    img_array = np.fromfile(str(image_path), dtype=np.uint8)
    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
    
    if img is None:
        raise ValueError(f"无法读取图片: {image_path}")
    
    print(f"[INFO] 读取图片: {Path(image_path).name}")
    print(f"[INFO] 图片尺寸: {img.shape[1]}x{img.shape[0]}")
    
    h, w = img.shape[:2]
    
    # 步骤1: 创建绿色掩码（检测绿色像素）
    green_mask = cv2.inRange(img, np.array(green_color_lower), np.array(green_color_upper))
    
    # 步骤2: 形态学操作，连接断开的绿色线条
    # 使用更大的kernel来更好地连接线条
    kernel = np.ones((5, 5), np.uint8)
    green_mask = cv2.morphologyEx(green_mask, cv2.MORPH_CLOSE, kernel, iterations=3)
    green_mask = cv2.dilate(green_mask, kernel, iterations=2)
    
    # 步骤3: 使用轮廓检测找到绿色框
    contours, _ = cv2.findContours(green_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    print(f"[INFO] 检测到 {len(contours)} 个绿色轮廓")
    
    # 步骤4: 提取矩形框并过滤
    green_boxes = []
    for i, contour in enumerate(contours):
        # 计算边界框
        x, y, box_w, box_h = cv2.boundingRect(contour)
        
        # 过滤太小的区域（可能是噪点）
        min_area = w * h * 0.005  # 至少占图片面积的0.5%（提高阈值，过滤小噪点）
        if box_w * box_h < min_area:
            continue
        
        # 过滤太细的线（宽度或高度小于10像素的可能是线条而不是框）
        if box_w < 10 or box_h < 10:
            continue
        
        # 计算轮廓面积与边界框面积的比值，过滤不规则的形状
        contour_area = cv2.contourArea(contour)
        bbox_area = box_w * box_h
        if bbox_area > 0:
            area_ratio = contour_area / bbox_area
            # 如果比值太小，可能是噪点或断开的线条
            if area_ratio < 0.1:
                continue
        
        green_boxes.append({
            'x1': int(x),
            'y1': int(y),
            'x2': int(x + box_w),
            'y2': int(y + box_h),
            'width': int(box_w),
            'height': int(box_h),
            'center_x': float(x + box_w / 2),
            'center_y': float(y + box_h / 2),
            'area': int(box_w * box_h)
        })
    
    print(f"[INFO] 过滤后剩余 {len(green_boxes)} 个绿色框")
    
    # 步骤5: 按位置排序
    # 排序规则：
    # 1. 最优先：按x坐标排序（右边优先，即x越大越靠前）
    # 2. 如果x坐标相近（差值小于阈值），再按y坐标排序（越上越前，即y越小越靠前）
    # 阈值：如果两个框的x坐标差值小于图片宽度的20%，则认为它们x坐标相近
    x_threshold = max(w * 0.2, 80)  # x坐标相近的阈值（图片宽度的20%，最小80像素）
    
    def sort_key(box):
        """
        排序键函数：
        1. 主要按x坐标（右边优先）：将x坐标量化，使得x坐标相近的框会被归到同一组
        2. 次要按y坐标（上面优先）：在同一组内按y坐标排序
        
        量化方法：将x坐标除以阈值并向下取整，这样x坐标相近的框会得到相同的量化值
        然后使用 (-量化值, y坐标) 作为排序键，确保：
        - x坐标越大（越右）的框排在前面
        - x坐标相近的框按y坐标排序（越上越前）
        """
        # 将x坐标量化：除以阈值并向下取整
        # 使用向下取整确保x坐标相近的框会被归到同一组
        x_quantized = int(box['center_x'] // x_threshold)
        # 返回排序键：(-x_quantized, center_y)
        # -x_quantized 确保右边（x更大）的框排在前面
        # center_y 确保x坐标相近的框按y坐标排序（越上越前）
        return (-x_quantized, box['center_y'])
    
    # 使用排序键排序
    green_boxes.sort(key=sort_key)
    
    # 打印排序后的框信息（用于调试）
    print(f"[DEBUG] 排序后的绿色框顺序 (x_threshold={x_threshold:.1f}):")
    for i, box in enumerate(green_boxes, 1):
        x_quantized = int(box['center_x'] // x_threshold)
        print(f"  [{i}] center_x={box['center_x']:.1f}, center_y={box['center_y']:.1f}, x_quantized={x_quantized}, 位置=({box['x1']},{box['y1']}) -> ({box['x2']},{box['y2']})")
    
    return green_boxes


def cut_dialog_blocks_by_green_box(image_path, output_dir, padding=5):
    """
    检测绿色线框并切图
    
    参数:
        image_path: 包含绿色线框的图片路径
        output_dir: 输出目录
        padding: 裁剪时添加的边距（像素）
    
    返回:
        保存的文件信息列表
    """
    # 检测绿色框
    green_boxes = detect_green_boxes(image_path)
    
    if not green_boxes:
        print("[WARN] 未检测到任何绿色框")
        return []
    
    # 读取原图（用于裁剪）
    img_array = np.fromfile(str(image_path), dtype=np.uint8)
    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
    
    if img is None:
        raise ValueError(f"无法读取图片: {image_path}")
    
    img_height, img_width = img.shape[:2]
    
    # 确保输出目录存在
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # 创建绿色掩码（用于排除绿色像素）
    green_mask = cv2.inRange(img, np.array((0, 200, 0)), np.array((50, 255, 50)))
    
    # 裁剪并保存每个绿色框内的内容
    saved_files = []
    print(f"[DEBUG] 开始保存文件，共 {len(green_boxes)} 个框:")
    for i, box in enumerate(green_boxes):
        print(f"[DEBUG] 处理框 {i+1}: center_x={box['center_x']:.1f}, center_y={box['center_y']:.1f}, 位置=({box['x1']},{box['y1']}) -> ({box['x2']},{box['y2']})")
        # 步骤1: 向内收缩边界，排除绿色线框本身（线框通常2-3像素宽）
        # 这样裁剪的区域只包含框内的内容，不包含绿色线框
        line_thickness = 3  # 绿色线框的估计厚度
        x1 = max(0, box['x1'] + line_thickness)
        y1 = max(0, box['y1'] + line_thickness)
        x2 = min(img_width, box['x2'] - line_thickness)
        y2 = min(img_height, box['y2'] - line_thickness)
        
        # 步骤2: 添加边距（向内收缩后再向外扩展，但不超过原图边界）
        x1 = max(0, x1 - padding)
        y1 = max(0, y1 - padding)
        x2 = min(img_width, x2 + padding)
        y2 = min(img_height, y2 + padding)
        
        if x2 <= x1 or y2 <= y1:
            print(f"  [WARN] 绿色框 {i + 1} 裁剪区域无效，跳过")
            continue
        
        # 步骤3: 裁剪原图内容（只包含绿色框内的区域）
        crop = img[y1:y2, x1:x2].copy()
        
        # 步骤4: 获取对应区域的绿色掩码，将残留的绿色像素替换为白色背景
        # 这样可以确保输出图片不包含任何绿色线框
        crop_green_mask = green_mask[y1:y2, x1:x2]
        crop[crop_green_mask > 0] = [255, 255, 255]  # BGR格式的白色
        
        # 保存（使用dialog_{i}.png格式，与Node.js期望的格式一致）
        block_filename = f"dialog_{i + 1}.png"
        block_path = output_dir / block_filename
        
        success, encoded_img = cv2.imencode('.png', crop)
        if success:
            encoded_img.tofile(str(block_path))
            saved_files.append({
                'group_index': i + 1,
                'file_path': str(block_path),
                'file_name': block_filename,
                'bbox': {
                    'x1': x1,
                    'y1': y1,
                    'x2': x2,
                    'y2': y2
                },
                'blocks_count': 1
            })
            # 添加更详细的调试信息，显示框的中心坐标
            print(f"  [{i + 1}/{len(green_boxes)}] ✅ 已保存: {block_filename} (区域: {x1},{y1} -> {x2},{y2}, center_x={box['center_x']:.1f}, center_y={box['center_y']:.1f})")
        else:
            print(f"  [ERROR] 绿色框 {i + 1} 保存失败")
    
    print(f"\n✅ 成功裁剪并保存 {len(saved_files)} 个文字块")
    return saved_files


if __name__ == '__main__':
    if len(sys.argv) < 3:
        print("用法: python cut_dialog_blocks_by_green_box.py <image_path> <output_dir> [padding]")
        print("示例: python cut_dialog_blocks_by_green_box.py image.png output_dir 5")
        sys.exit(1)
    
    image_path = sys.argv[1]
    output_dir = sys.argv[2]
    padding = int(sys.argv[3]) if len(sys.argv) > 3 else 5
    
    saved_files = cut_dialog_blocks_by_green_box(image_path, output_dir, padding)
    
    # 输出JSON结果
    result = {
        'image_path': image_path,
        'output_dir': output_dir,
        'total_blocks': len(saved_files),
        'saved_files': saved_files
    }
    
    result_json_path = Path(output_dir) / 'cut_blocks_result.json'
    with open(result_json_path, 'w', encoding='utf-8') as f:
        json.dump(result, f, ensure_ascii=False, indent=2)
    
    print(f"\n✅ 结果已保存到: {result_json_path}")