yichael
/
AIStoryBoard


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
							# -*- coding: utf-8 -*-
"""
在格子内检测并切割文字区域
"""

import sys
import cv2
import numpy as np
from pathlib import Path

# Windows编码修复
if sys.platform == 'win32':
    import io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')


def cut_text_regions_in_panel(panel_image_path, output_dir):
    """
    在格子图片中检测文字区域并切割成小块
    
    参数:
        panel_image_path: 格子图片路径
        output_dir: 输出目录
    """
    panel_image_path = Path(panel_image_path)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # 读取格子图片
    img_array = np.fromfile(str(panel_image_path), dtype=np.uint8)
    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
    
    if img is None:
        raise ValueError(f"无法读取图片: {panel_image_path}")
    
    img_height, img_width = img.shape[:2]
    
    # 转换为灰度图
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img.copy()
    
    # 使用自适应阈值进行二值化
    binary = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY_INV, 11, 2
    )
    
    # 形态学操作，连接相近的文字
    kernel = np.ones((3, 3), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)
    binary = cv2.dilate(binary, kernel, iterations=1)
    
    # 查找轮廓
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # 过滤轮廓，找到文字区域
    text_regions = []
    min_area = (img_width * img_height) * 0.001  # 最小面积（0.1%）
    max_area = (img_width * img_height) * 0.5    # 最大面积（50%）
    
    for contour in contours:
        area = cv2.contourArea(contour)
        if min_area < area < max_area:
            x, y, w, h = cv2.boundingRect(contour)
            # 过滤掉太小的区域
            if w > 10 and h > 10:
                text_regions.append({
                    'x': x,
                    'y': y,
                    'width': w,
                    'height': h,
                    'center_x': x + w / 2,
                    'center_y': y + h / 2
                })
    
    # 按照最右边（X坐标最大）最靠前，然后最上面（Y坐标最小）最靠前的原则排序
    text_regions.sort(key=lambda r: (-r['center_x'], r['center_y']))
    
    # 切割每个文字区域
    panel_name = panel_image_path.stem
    cut_files = []
    
    for idx, region in enumerate(text_regions, 1):
        x = max(0, region['x'])
        y = max(0, region['y'])
        w = min(region['width'], img_width - x)
        h = min(region['height'], img_height - y)
        
        if w <= 0 or h <= 0:
            continue
        
        # 切割文字区域
        text_roi = img[y:y+h, x:x+w]
        
        # 保存切割后的图片
        output_filename = f"{panel_name}_text{idx}.png"
        output_path = output_dir / output_filename
        
        # 使用cv2.imencode处理中文路径
        success, encoded_img = cv2.imencode('.png', text_roi)
        if success:
            encoded_img.tofile(str(output_path))
            cut_files.append(str(output_path))
    
    return cut_files


if __name__ == '__main__':
    if len(sys.argv) < 3:
        print("用法: python cut_text_regions_in_panel.py <格子图片路径> <输出目录>")
        sys.exit(1)
    
    panel_image_path = sys.argv[1]
    output_dir = sys.argv[2]
    
    try:
        cut_files = cut_text_regions_in_panel(panel_image_path, output_dir)
        print(f"✅ 成功切割 {len(cut_files)} 个文字区域")
    except Exception as e:
        print(f"[ERROR] 切割失败: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)