yichael
/
AIStoryBoard


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
							# -*- coding: utf-8 -*-
"""
使用opencv检测漫画格子（分镜框）
"""

import sys
import json
from pathlib import Path
import cv2
import numpy as np

# Windows编码修复
if sys.platform == 'win32':
    import io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')


def validate_panel(gray, panel, border_width=10):
    """
    验证格子是否符合要求：内侧有画面，外侧完全空白
    
    参数:
        gray: 灰度图
        panel: 格子信息字典，包含 x, y, width, height
        border_width: 检查外侧区域的宽度（像素）
    
    返回:
        bool: True表示符合要求，False表示不符合
    """
    im_h, im_w = gray.shape
    x = panel['x']
    y = panel['y']
    w = panel['width']
    h = panel['height']
    
    # 确保坐标在图片范围内
    x = max(0, min(im_w - 1, x))
    y = max(0, min(im_h - 1, y))
    w = min(w, im_w - x)
    h = min(h, im_h - y)
    
    if w <= 0 or h <= 0:
        return False
    
    # 1. 检查内侧（格子内部）是否有画面内容
    # 内侧区域：稍微缩小一点，避免边界影响
    inner_margin = max(2, int(min(w, h) * 0.05))
    inner_x1 = x + inner_margin
    inner_y1 = y + inner_margin
    inner_x2 = x + w - inner_margin
    inner_y2 = y + h - inner_margin
    
    if inner_x2 <= inner_x1 or inner_y2 <= inner_y1:
        return False
    
    # 提取内侧区域
    inner_region = gray[inner_y1:inner_y2, inner_x1:inner_x2]
    
    # 计算内侧的平均亮度和标准差
    inner_mean = np.mean(inner_region)
    inner_std = np.std(inner_region)
    
    # 内侧应该有内容：平均亮度不能太高（< 250），或者标准差要足够大（> 5）
    # 放宽条件：如果平均亮度很高且标准差很低，说明是纯白色区域，没有内容
    if inner_mean > 250 and inner_std < 5:
        return False  # 内侧是纯白色，没有内容
    
    # 2. 检查外侧（格子边界外）是否完全空白
    # 外侧区域：格子边界外的 border_width 像素宽度
    outer_regions = []
    
    # 上侧外侧区域
    if y >= border_width:
        outer_top = gray[max(0, y - border_width):y, x:min(im_w, x + w)]
        if outer_top.size > 0:
            outer_regions.append(('top', outer_top))
    
    # 下侧外侧区域
    if y + h + border_width <= im_h:
        outer_bottom = gray[y + h:min(im_h, y + h + border_width), x:min(im_w, x + w)]
        if outer_bottom.size > 0:
            outer_regions.append(('bottom', outer_bottom))
    
    # 左侧外侧区域
    if x >= border_width:
        outer_left = gray[y:min(im_h, y + h), max(0, x - border_width):x]
        if outer_left.size > 0:
            outer_regions.append(('left', outer_left))
    
    # 右侧外侧区域
    if x + w + border_width <= im_w:
        outer_right = gray[y:min(im_h, y + h), x + w:min(im_w, x + w + border_width)]
        if outer_right.size > 0:
            outer_regions.append(('right', outer_right))
    
    # 如果没有任何外侧区域可以检查（格子太靠近边缘），放宽条件：允许通过
    if len(outer_regions) == 0:
        # 如果格子很大（占图片面积超过5%），可能是主要格子，允许通过
        if (w * h) > (im_w * im_h * 0.05):
            return True
        return False
    
    # 检查所有外侧区域是否都是白色（完全空白）
    # 放宽条件：外侧应该是白色：平均亮度 > 200，标准差 < 30
    # 至少有一半的外侧区域是白色即可
    white_count = 0
    for side_name, outer_region in outer_regions:
        if outer_region.size == 0:
            continue
        outer_mean = np.mean(outer_region)
        outer_std = np.std(outer_region)
        
        # 如果平均亮度足够高且标准差足够小，认为是白色区域
        if outer_mean > 200 and outer_std < 30:
            white_count += 1
    
    # 至少有一半的外侧区域是白色，就认为符合要求
    if white_count >= len(outer_regions) * 0.5:
        return True
    
    # 如果格子很大（占图片面积超过2%），即使外侧不完全符合，也允许通过（可能是主要格子）
    if (w * h) > (im_w * im_h * 0.02):
        return True
    
    return False


def detect_panels_from_white_borders(img):
    """
    基于漫画最外面区域一定是白色的特点检测格子
    策略：
    1. 识别图片边缘的白色区域
    2. 一行最多两个格子，先识别每个格子的两条边界线
    3. 找与这两条线成90°的直线段，组成完整的格子
    
    参数:
        img: 输入图片（BGR格式）
    
    返回:
        panel_mask: 格子遮罩图
        panels: 格子列表
    """
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img.copy()
    
    im_h, im_w = gray.shape
    img_area = im_h * im_w
    
    # 1. 识别图片边缘的白色区域
    # 检查四个边缘区域（上、下、左、右）是否为白色
    border_width = max(10, int(min(im_w, im_h) * 0.02))
    
    # 上边缘
    top_region = gray[0:border_width, :]
    top_white = np.mean(top_region) > 240
    
    # 下边缘
    bottom_region = gray[im_h-border_width:im_h, :]
    bottom_white = np.mean(bottom_region) > 240
    
    # 左边缘
    left_region = gray[:, 0:border_width]
    left_white = np.mean(left_region) > 240
    
    # 右边缘
    right_region = gray[:, im_w-border_width:im_w]
    right_white = np.mean(right_region) > 240
    
    print(f"[DEBUG] 边缘白色检测: 上={top_white}, 下={bottom_white}, 左={left_white}, 右={right_white}")
    
    # 2. 检测水平和垂直线（格子边界线）
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # 使用自适应阈值
    adaptive_thresh = cv2.adaptiveThreshold(
        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY_INV, 11, 2
    )
    
    # Canny边缘检测
    edges = cv2.Canny(blurred, 30, 100, apertureSize=3)
    
    # 合并
    combined_edges = cv2.bitwise_or(edges, adaptive_thresh)
    
    # 检测水平线（用于分割行）
    h_kernel_size = max(int(im_w * 0.05), 30)
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_kernel_size, 1))
    horizontal_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, horizontal_kernel)
    horizontal_lines = cv2.dilate(horizontal_lines, horizontal_kernel, iterations=2)
    
    # 检测垂直线（用于分割列，一行最多两个格子）
    v_kernel_size = max(int(im_h * 0.05), 30)
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_kernel_size))
    vertical_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, vertical_kernel)
    vertical_lines = cv2.dilate(vertical_lines, vertical_kernel, iterations=2)
    
    # 3. 使用霍夫直线检测，找到主要的水平和垂直线
    min_line_length = max(int(min(im_w, im_h) * 0.15), 50)  # 增加最小长度
    
    # 检测水平线（提高阈值，只检测主要的水平分割线）
    h_lines = cv2.HoughLinesP(
        horizontal_lines, 
        1, 
        np.pi/180, 
        threshold=max(int(min(im_w, im_h) * 0.25), 80),  # 提高阈值
        minLineLength=min_line_length,
        maxLineGap=max(int(min(im_w, im_h) * 0.03), 10)
    )
    
    # 检测垂直线（提高阈值，只检测主要的垂直分割线）
    v_lines = cv2.HoughLinesP(
        vertical_lines, 
        1, 
        np.pi/180, 
        threshold=max(int(min(im_w, im_h) * 0.25), 80),  # 提高阈值
        minLineLength=min_line_length,
        maxLineGap=max(int(min(im_w, im_h) * 0.03), 10)
    )
    
    # 4. 合并相近的水平线和垂直线，得到主要的格子分割线
    def merge_lines(lines, is_horizontal=True):
        """合并相近的直线，并过滤掉太短的线"""
        if lines is None or len(lines) == 0:
            return []
        
        merged = []
        used = set()
        
        # 合并阈值：根据图片尺寸调整
        merge_threshold_h = max(im_h * 0.03, 30)  # 水平线合并阈值
        merge_threshold_v = max(im_w * 0.03, 30)  # 垂直线合并阈值
        
        for i, line in enumerate(lines):
            if i in used:
                continue
            
            x1, y1, x2, y2 = line[0]
            
            if is_horizontal:
                # 水平线：合并Y坐标相近的线
                y = (y1 + y2) / 2
                similar_lines = [line]
                used.add(i)
                
                for j, other_line in enumerate(lines[i+1:], i+1):
                    if j in used:
                        continue
                    ox1, oy1, ox2, oy2 = other_line[0]
                    oy = (oy1 + oy2) / 2
                    
                    # 如果Y坐标相近（在阈值内），合并
                    if abs(y - oy) < merge_threshold_h:
                        similar_lines.append(other_line)
                        used.add(j)
                
                # 计算合并后的线的平均Y坐标和X范围
                avg_y = int(np.mean([(l[0][1] + l[0][3]) / 2 for l in similar_lines]))
                min_x = int(min([min(l[0][0], l[0][2]) for l in similar_lines]))
                max_x = int(max([max(l[0][0], l[0][2]) for l in similar_lines]))
                
                # 过滤：水平线必须跨越至少50%的图片宽度
                if (max_x - min_x) > im_w * 0.5:
                    merged.append((min_x, avg_y, max_x, avg_y))
            else:
                # 垂直线：合并X坐标相近的线
                x = (x1 + x2) / 2
                similar_lines = [line]
                used.add(i)
                
                for j, other_line in enumerate(lines[i+1:], i+1):
                    if j in used:
                        continue
                    ox1, oy1, ox2, oy2 = other_line[0]
                    ox = (ox1 + ox2) / 2
                    
                    # 如果X坐标相近（在阈值内），合并
                    if abs(x - ox) < merge_threshold_v:
                        similar_lines.append(other_line)
                        used.add(j)
                
                # 计算合并后的线的平均X坐标和Y范围
                avg_x = int(np.mean([(l[0][0] + l[0][2]) / 2 for l in similar_lines]))
                min_y = int(min([min(l[0][1], l[0][3]) for l in similar_lines]))
                max_y = int(max([max(l[0][1], l[0][3]) for l in similar_lines]))
                
                # 过滤：垂直线必须跨越至少50%的图片高度
                if (max_y - min_y) > im_h * 0.5:
                    merged.append((avg_x, min_y, avg_x, max_y))
        
        return merged
    
    h_merged = merge_lines(h_lines, is_horizontal=True)
    v_merged = merge_lines(v_lines, is_horizontal=False)
    
    print(f"[DEBUG] 检测到 {len(h_merged)} 条水平线和 {len(v_merged)} 条垂直线")
    
    # 5. 根据水平和垂直线构建格子
    # 添加图片边界线
    h_coords = [0]  # 上边界
    for line in h_merged:
        y = line[1]
        if 0 < y < im_h:
            h_coords.append(y)
    h_coords.append(im_h)  # 下边界
    h_coords = sorted(set(h_coords))
    
    v_coords = [0]  # 左边界
    for line in v_merged:
        x = line[0]
        if 0 < x < im_w:
            v_coords.append(x)
    v_coords.append(im_w)  # 右边界
    v_coords = sorted(set(v_coords))
    
    print(f"[DEBUG] 水平分割线Y坐标: {h_coords}")
    print(f"[DEBUG] 垂直分割线X坐标: {v_coords}")
    
    # 6. 构建格子（每行最多两个格子）
    panels = []
    min_panel_area = img_area * 0.02
    
    # 过滤掉太近的水平线（可能是重复检测）
    filtered_h_coords = [h_coords[0]]
    for i in range(1, len(h_coords) - 1):
        if h_coords[i] - filtered_h_coords[-1] > im_h * 0.05:  # 至少间隔5%的高度
            filtered_h_coords.append(h_coords[i])
    filtered_h_coords.append(h_coords[-1])
    
    # 过滤掉太近的垂直线（可能是重复检测）
    filtered_v_coords = [v_coords[0]]
    for i in range(1, len(v_coords) - 1):
        if v_coords[i] - filtered_v_coords[-1] > im_w * 0.05:  # 至少间隔5%的宽度
            filtered_v_coords.append(v_coords[i])
    filtered_v_coords.append(v_coords[-1])
    
    print(f"[DEBUG] 过滤后: {len(filtered_h_coords)} 条水平分割线, {len(filtered_v_coords)} 条垂直分割线")
    
    # 根据过滤后的坐标构建格子
    for i in range(len(filtered_h_coords) - 1):
        y1 = filtered_h_coords[i]
        y2 = filtered_h_coords[i + 1]
        
        # 一行最多两个格子
        # 找到在这个行范围内的垂直分割线
        row_v_coords = [filtered_v_coords[0]]  # 行的左边界
        
        for v_x in filtered_v_coords[1:-1]:  # 排除左右边界
            # 检查这条垂直线是否与当前行相交
            # 检查垂直线附近是否有足够的边缘响应
            line_region = combined_edges[y1:y2, max(0, v_x-10):min(im_w, v_x+10)]
            if np.sum(line_region > 0) > (y2 - y1) * 0.2:  # 至少20%的区域有边缘
                row_v_coords.append(v_x)
        
        row_v_coords.append(filtered_v_coords[-1])  # 行的右边界
        
        # 如果一行有太多垂直分割线，只保留主要的（每行最多2个格子，所以最多3条垂直线：左、中、右）
        if len(row_v_coords) > 3:
            # 选择最靠近左、中、右位置的线
            left = row_v_coords[0]
            right = row_v_coords[-1]
            mid = (left + right) / 2
            
            # 找到最接近中间位置的垂直线
            closest_mid = min(row_v_coords[1:-1], key=lambda x: abs(x - mid))
            row_v_coords = [left, closest_mid, right]
        
        # 根据垂直分割线创建格子（每行最多两个格子）
        for j in range(len(row_v_coords) - 1):
            x1 = row_v_coords[j]
            x2 = row_v_coords[j + 1]
            
            # 基本过滤
            w = x2 - x1
            h = y2 - y1
            area = w * h
            
            # 过滤：面积太小或高度/宽度太小
            min_height = im_h * 0.08  # 至少占图片高度的8%
            min_width = im_w * 0.15   # 至少占图片宽度的15%
            
            if area < min_panel_area or h < min_height or w < min_width:
                continue
            
            # 验证格子：内侧有内容，外侧是白色
            panel_candidate = {
                'x': int(x1),
                'y': int(y1),
                'width': int(w),
                'height': int(h),
                'area': area,
                'center_x': float(x1 + w / 2),
                'center_y': float(y1 + h / 2)
            }
            
            # 放宽验证条件，因为我们已经基于线条构建了格子
            if validate_panel(gray, panel_candidate, border_width=max(5, int(min(im_w, im_h) * 0.005))):
                panels.append(panel_candidate)
    
    # 绘制遮罩图（黑线白底：背景为白色255，格子线为黑色0）
    panel_mask = np.ones_like(gray) * 255  # 创建全白的mask
    for panel in panels:
        cv2.rectangle(panel_mask, 
                     (panel['x'], panel['y']), 
                     (panel['x'] + panel['width'], 
                      panel['y'] + panel['height']), 
                     0, 4)  # 绘制黑色的格子线
    
    return panel_mask, panels


def detect_panels_from_text_mask(img, text_mask=None):
    """
    基于文字遮罩图的连通域分析检测格子
    使用文字遮罩图找到包含文字的大连通区域，这些区域很可能就是格子
    
    参数:
        img: 输入图片（BGR格式）
        text_mask: 文字遮罩图（灰度图，文字区域为白色255，其他为黑色0）
    
    返回:
        panel_mask: 格子遮罩图
        panels: 格子列表
    """
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img.copy()
    
    im_h, im_w = gray.shape
    img_area = im_h * im_w
    
    # 如果没有提供文字遮罩图，使用传统方法
    if text_mask is None:
        return detect_comic_panels(img)
    
    # 确保文字遮罩图尺寸匹配
    if text_mask.shape[:2] != (im_h, im_w):
        text_mask = cv2.resize(text_mask, (im_w, im_h))
    
    # 方法：基于文字遮罩图的连通域分析
    # 1. 对文字遮罩图进行膨胀，连接相近的文字区域
    # 2. 使用连通域分析找到包含文字的大区域
    # 3. 扩展这些区域以包含周围的空白区域
    # 4. 验证每个区域是否符合格子特征
    
    # 对文字遮罩图进行膨胀，连接相近的文字区域
    kernel_size = max(5, int(min(im_w, im_h) * 0.01))
    kernel = np.ones((kernel_size, kernel_size), np.uint8)
    dilated_mask = cv2.dilate(text_mask, kernel, iterations=3)
    dilated_mask = cv2.morphologyEx(dilated_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
    
    # 使用连通域分析找到包含文字的大区域
    connectivity = 8
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
        dilated_mask, connectivity, cv2.CV_32S
    )
    
    print(f"[DEBUG] 文字遮罩图连通域分析：找到 {num_labels - 1} 个连通区域（排除背景）")
    
    panels = []
    min_panel_area = img_area * 0.02  # 最小格子面积（2%）
    max_panel_area = img_area * 0.50  # 最大格子面积（50%）
    
    # 遍历所有连通区域（跳过背景标签0）
    for label_index in range(1, num_labels):
        stat = stats[label_index]
        x, y, w, h, area = stat
        
        # 基本过滤：面积太小或太大
        if area < min_panel_area or area > max_panel_area:
            continue
        
        # 计算宽高比
        aspect_ratio = w / h if h > 0 else 0
        if aspect_ratio < 0.15 or aspect_ratio > 6.0:
            continue
        
        # 扩展边界框以包含周围的空白区域
        # 扩展比例：向四周扩展文字区域尺寸的 30-50%
        expand_x = int(w * 0.4)
        expand_y = int(h * 0.4)
        
        panel_x1 = max(0, x - expand_x)
        panel_y1 = max(0, y - expand_y)
        panel_x2 = min(im_w, x + w + expand_x)
        panel_y2 = min(im_h, y + h + expand_y)
        
        panel_w = panel_x2 - panel_x1
        panel_h = panel_y2 - panel_y1
        panel_area = panel_w * panel_h
        
        # 验证格子
        panel_candidate = {
            'x': panel_x1,
            'y': panel_y1,
            'width': panel_w,
            'height': panel_h,
            'area': panel_area,
            'center_x': float(panel_x1 + panel_w / 2),
            'center_y': float(panel_y1 + panel_h / 2)
        }
        
        if validate_panel(gray, panel_candidate, border_width=max(10, int(min(im_w, im_h) * 0.01))):
            panels.append(panel_candidate)
    
    # 合并重叠的格子
    merged_panels = []
    for panel in panels:
        merged = False
        for i, existing in enumerate(merged_panels):
            # 计算重叠
            overlap_x = max(0, min(panel['x'] + panel['width'], 
                                 existing['x'] + existing['width']) - 
                          max(panel['x'], existing['x']))
            overlap_y = max(0, min(panel['y'] + panel['height'], 
                                 existing['y'] + existing['height']) - 
                          max(panel['y'], existing['y']))
            overlap_area = overlap_x * overlap_y
            
            # 如果重叠面积超过较小格子的50%，合并它们
            min_area_for_merge = min(panel['area'], existing['area'])
            if overlap_area > min_area_for_merge * 0.5:
                # 合并：取两个格子的并集
                new_x = min(panel['x'], existing['x'])
                new_y = min(panel['y'], existing['y'])
                new_w = max(panel['x'] + panel['width'], 
                           existing['x'] + existing['width']) - new_x
                new_h = max(panel['y'] + panel['height'], 
                           existing['y'] + existing['height']) - new_y
                
                merged_panels[i] = {
                    'x': int(new_x),
                    'y': int(new_y),
                    'width': int(new_w),
                    'height': int(new_h),
                    'center_x': float(new_x + new_w / 2),
                    'center_y': float(new_y + new_h / 2)
                }
                merged = True
                break
        
        if not merged:
            merged_panels.append(panel)
    
    # 绘制遮罩图（黑线白底：背景为白色255，格子线为黑色0）
    panel_mask = np.ones_like(gray) * 255  # 创建全白的mask
    for panel in merged_panels:
        cv2.rectangle(panel_mask, 
                     (panel['x'], panel['y']), 
                     (panel['x'] + panel['width'], 
                      panel['y'] + panel['height']), 
                     0, 4)  # 绘制黑色的格子线
    
    return panel_mask, merged_panels


def detect_panels_from_text_blocks(img, text_blocks=None):
    """
    基于文字块位置检测格子
    如果提供了文字块列表，使用它们来推断格子边界
    
    参数:
        img: 输入图片（BGR格式）
        text_blocks: 文字块列表（可选），每个元素包含 xyxy 坐标 [x1, y1, x2, y2]
    
    返回:
        panel_mask: 格子遮罩图
        panels: 格子列表
    """
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img.copy()
    
    im_h, im_w = gray.shape
    img_area = im_h * im_w
    
    # 如果没有提供文字块，使用传统方法
    if text_blocks is None or len(text_blocks) == 0:
        return detect_comic_panels(img)
    
    # 方法：基于文字块聚类和区域扩展
    # 1. 根据文字块位置进行聚类（使用简单的距离阈值）
    # 2. 对每个聚类，扩展边界框以包含周围的空白区域
    # 3. 验证扩展后的区域是否符合格子特征
    
    panels = []
    used_blocks = set()
    
    # 计算文字块之间的距离，进行聚类
    min_panel_area = img_area * 0.02  # 最小格子面积
    max_panel_area = img_area * 0.50  # 最大格子面积
    
    for i, block in enumerate(text_blocks):
        if i in used_blocks:
            continue
        
        # 获取文字块的边界框
        if isinstance(block, dict):
            x1, y1, x2, y2 = block.get('xyxy', block.get('bbox', [0, 0, 0, 0]))
        else:
            x1, y1, x2, y2 = block[:4] if len(block) >= 4 else [0, 0, 0, 0]
        
        if x2 <= x1 or y2 <= y1:
            continue
        
        # 找到与当前文字块相邻的其他文字块（聚类）
        cluster_blocks = [i]
        used_blocks.add(i)
        
        # 扩展搜索范围：查找附近的文字块
        search_margin = max((x2 - x1) * 2, (y2 - y1) * 2, 100)
        
        for j, other_block in enumerate(text_blocks):
            if j in used_blocks or j == i:
                continue
            
            if isinstance(other_block, dict):
                ox1, oy1, ox2, oy2 = other_block.get('xyxy', other_block.get('bbox', [0, 0, 0, 0]))
            else:
                ox1, oy1, ox2, oy2 = other_block[:4] if len(other_block) >= 4 else [0, 0, 0, 0]
            
            if ox2 <= ox1 or oy2 <= oy1:
                continue
            
            # 计算两个文字块的距离
            center_x = (x1 + x2) / 2
            center_y = (y1 + y2) / 2
            o_center_x = (ox1 + ox2) / 2
            o_center_y = (oy1 + oy2) / 2
            
            distance = np.sqrt((center_x - o_center_x)**2 + (center_y - o_center_y)**2)
            
            # 如果距离在搜索范围内，加入聚类
            if distance < search_margin:
                cluster_blocks.append(j)
                used_blocks.add(j)
        
        # 计算聚类的边界框
        def get_bbox(block):
            if isinstance(block, dict):
                return block.get('xyxy', block.get('bbox', [0, 0, 0, 0]))
            else:
                return block[:4] if len(block) >= 4 else [0, 0, 0, 0]
        
        cluster_bboxes = [get_bbox(text_blocks[b]) for b in cluster_blocks]
        cluster_x1 = min([bbox[0] for bbox in cluster_bboxes])
        cluster_y1 = min([bbox[1] for bbox in cluster_bboxes])
        cluster_x2 = max([bbox[2] for bbox in cluster_bboxes])
        cluster_y2 = max([bbox[3] for bbox in cluster_bboxes])
        
        # 扩展边界框以包含周围的空白区域
        # 扩展比例：向四周扩展文字块尺寸的 20-50%
        expand_x = int((cluster_x2 - cluster_x1) * 0.3)
        expand_y = int((cluster_y2 - cluster_y1) * 0.3)
        
        panel_x1 = max(0, int(cluster_x1 - expand_x))
        panel_y1 = max(0, int(cluster_y1 - expand_y))
        panel_x2 = min(im_w, int(cluster_x2 + expand_x))
        panel_y2 = min(im_h, int(cluster_y2 + expand_y))
        
        panel_w = panel_x2 - panel_x1
        panel_h = panel_y2 - panel_y1
        panel_area = panel_w * panel_h
        
        # 基本过滤
        if panel_area < min_panel_area or panel_area > max_panel_area:
            continue
        
        # 验证格子
        panel_candidate = {
            'x': panel_x1,
            'y': panel_y1,
            'width': panel_w,
            'height': panel_h,
            'area': panel_area,
            'center_x': float(panel_x1 + panel_w / 2),
            'center_y': float(panel_y1 + panel_h / 2)
        }
        
        if validate_panel(gray, panel_candidate, border_width=max(10, int(min(im_w, im_h) * 0.01))):
            panels.append(panel_candidate)
    
    # 绘制遮罩图（黑线白底：背景为白色255，格子线为黑色0）
    panel_mask = np.ones_like(gray) * 255  # 创建全白的mask
    for panel in panels:
        cv2.rectangle(panel_mask, 
                     (panel['x'], panel['y']), 
                     (panel['x'] + panel['width'], 
                      panel['y'] + panel['height']), 
                     0, 4)  # 绘制黑色的格子线
    
    return panel_mask, panels


def detect_comic_panels(img, text_blocks=None, text_mask=None):
    """
    使用opencv检测漫画格子（分镜框）- 改进版
    策略：优先使用基于白色边界的方法，其次使用文字遮罩图，最后使用传统方法
    
    参数:
        img: 输入图片（BGR格式）
        text_blocks: 文字块列表（可选），来自 comic-text-detector
        text_mask: 文字遮罩图（可选），来自 comic-text-detector
    
    返回:
        panel_mask: 格子遮罩图（灰度图，格子线为黑色0，其他为白色255）
        panels: 格子列表，每个格子包含边界框信息
    """
    # 优先使用基于白色边界的方法（利用漫画边缘一定是白色的特点）
    try:
        print(f"[DEBUG] 尝试使用基于白色边界的方法...")
        panel_mask, panels = detect_panels_from_white_borders(img)
        print(f"[DEBUG] 基于白色边界的方法检测到 {len(panels)} 个格子")
        if len(panels) >= 4:  # 如果检测到足够多的格子，使用这个方法
            print(f"[DEBUG] 使用基于白色边界的方法，检测到 {len(panels)} 个格子")
            return panel_mask, panels
        else:
            print(f"[DEBUG] 基于白色边界的方法检测到的格子数量不足（{len(panels)}个），尝试其他方法")
    except Exception as e:
        print(f"[WARN] 基于白色边界的方法失败: {e}")
        import traceback
        traceback.print_exc()
    
    # 其次使用基于文字遮罩图的方法
    if text_mask is not None:
        try:
            print(f"[DEBUG] 尝试使用基于文字遮罩图的连通域分析方法...")
            panel_mask, panels = detect_panels_from_text_mask(img, text_mask)
            print(f"[DEBUG] 基于文字遮罩图的方法检测到 {len(panels)} 个格子")
            if len(panels) >= 4:  # 如果检测到足够多的格子，使用这个方法
                print(f"[DEBUG] 使用基于文字遮罩图的方法，检测到 {len(panels)} 个格子")
                return panel_mask, panels
            else:
                print(f"[DEBUG] 基于文字遮罩图的方法检测到的格子数量不足（{len(panels)}个），尝试其他方法")
        except Exception as e:
            print(f"[WARN] 基于文字遮罩图的方法失败: {e}")
            import traceback
            traceback.print_exc()
    
    # 再次使用基于文字块的方法
    if text_blocks is not None and len(text_blocks) > 0:
        try:
            print(f"[DEBUG] 尝试使用基于文字块的方法，文字块数量: {len(text_blocks)}")
            panel_mask, panels = detect_panels_from_text_blocks(img, text_blocks)
            print(f"[DEBUG] 基于文字块的方法检测到 {len(panels)} 个格子")
            if len(panels) >= 4:  # 如果检测到足够多的格子，使用这个方法
                print(f"[DEBUG] 使用基于文字块的方法，检测到 {len(panels)} 个格子")
                return panel_mask, panels
            else:
                print(f"[DEBUG] 基于文字块的方法检测到的格子数量不足（{len(panels)}个），使用传统方法")
        except Exception as e:
            print(f"[WARN] 基于文字块的方法失败，使用传统方法: {e}")
            import traceback
            traceback.print_exc()
    
    # 传统方法：基于边缘检测和轮廓分析
    # 转换为灰度图
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img.copy()
    
    im_h, im_w = gray.shape
    img_area = im_h * im_w
    
    # 第一步：放宽条件，检测所有可能的候选格子
    # 方法1: 改进的边缘检测（更宽松）
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # 使用自适应阈值（更敏感）
    adaptive_thresh = cv2.adaptiveThreshold(
        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY_INV, 11, 2
    )
    
    # Canny边缘检测（更敏感的参数）
    edges = cv2.Canny(blurred, 20, 80, apertureSize=3)
    
    # 合并自适应阈值和Canny边缘
    combined_edges = cv2.bitwise_or(edges, adaptive_thresh)
    
    # 方法2: 检测水平线和垂直线（更宽松的kernel）
    h_kernel_size = max(int(im_w * 0.03), 20)  # 水平线kernel宽度（更小，更敏感）
    v_kernel_size = max(int(im_h * 0.03), 20)  # 垂直线kernel高度（更小，更敏感）
    
    # 检测水平线
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_kernel_size, 1))
    horizontal_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, horizontal_kernel)
    horizontal_lines = cv2.dilate(horizontal_lines, horizontal_kernel, iterations=3)  # 增加迭代次数
    
    # 检测垂直线
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_kernel_size))
    vertical_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, vertical_kernel)
    vertical_lines = cv2.dilate(vertical_lines, vertical_kernel, iterations=3)  # 增加迭代次数
    
    # 合并水平和垂直线
    lines_mask = cv2.bitwise_or(horizontal_lines, vertical_lines)
    
    # 对边缘进行膨胀，连接断开的线条（更激进）
    kernel_size = max(5, int(min(im_w, im_h) * 0.005))
    kernel = np.ones((kernel_size, kernel_size), np.uint8)
    dilated = cv2.dilate(lines_mask, kernel, iterations=5)  # 增加迭代次数
    dilated = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, kernel, iterations=3)  # 增加迭代次数
    
    # 如果线条mask太稀疏，尝试使用更直接的方法：基于灰度值的分割
    # 使用阈值分割，找到可能的格子区域
    _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # 合并线条mask和阈值分割结果
    combined_mask = cv2.bitwise_or(dilated, thresh)
    
    # 查找轮廓（使用RETR_TREE获取所有轮廓，包括嵌套的）
    contours, hierarchy = cv2.findContours(
        combined_mask, 
        cv2.RETR_TREE, 
        cv2.CHAIN_APPROX_SIMPLE
    )
    
    print(f"[DEBUG] 找到 {len(contours)} 个轮廓")
    
    # 第二步：放宽条件，收集所有候选格子
    candidate_panels = []
    min_area = img_area * 0.005  # 非常宽松：至少占图片0.5%的面积
    max_area = img_area * 0.95   # 最大不超过95%
    
    for contour in contours:
        # 计算轮廓的边界框
        x, y, w, h = cv2.boundingRect(contour)
        area = w * h
        
        # 基本过滤（非常宽松）
        if area < min_area or area > max_area:
            continue
        
        # 计算宽高比（非常宽松的范围）
        aspect_ratio = w / h if h > 0 else 0
        if aspect_ratio < 0.1 or aspect_ratio > 10.0:
            continue
        
        candidate_panels.append({
            'x': int(x),
            'y': int(y),
            'width': int(w),
            'height': int(h),
            'area': area,
            'aspect_ratio': aspect_ratio,
            'center_x': float(x + w / 2),
            'center_y': float(y + h / 2)
        })
    
    # 按面积排序，优先选择较大的区域（更可能是主要格子）
    candidate_panels.sort(key=lambda p: p['area'], reverse=True)
    
    # 如果候选格子太多，只保留前100个最大的（避免验证太多小区域）
    if len(candidate_panels) > 100:
        candidate_panels = candidate_panels[:100]
    
    print(f"[DEBUG] 检测到 {len(candidate_panels)} 个候选格子（已按面积排序）")
    
    # 第三步：验证每个候选格子是否符合要求（内侧有画面，外侧完全空白）
    valid_panels = []
    border_width = max(10, int(min(im_w, im_h) * 0.01))
    
    for i, candidate in enumerate(candidate_panels):
        is_valid = validate_panel(gray, candidate, border_width=border_width)
        if is_valid:
            valid_panels.append(candidate)
        # 只对前10个候选格子输出调试信息
        if i < 10:
            area_ratio = (candidate['area'] / img_area) * 100
            print(f"[DEBUG] 候选格子 {i+1}: 面积={candidate['area']:.0f} ({area_ratio:.2f}%), "
                  f"尺寸={candidate['width']}x{candidate['height']}, "
                  f"位置=({candidate['x']}, {candidate['y']}), 验证={'通过' if is_valid else '失败'}")
    
    print(f"[DEBUG] 验证后保留 {len(valid_panels)} 个有效格子")
    
    # 第四步：合并重叠和相邻的格子
    panels = []
    for candidate in valid_panels:
        merged = False
        
        # 检查是否可以与已有格子合并
        for i, existing in enumerate(panels):
            # 计算重叠或相邻关系
            overlap_x = max(0, min(candidate['x'] + candidate['width'], 
                                 existing['x'] + existing['width']) - 
                          max(candidate['x'], existing['x']))
            overlap_y = max(0, min(candidate['y'] + candidate['height'], 
                                 existing['y'] + existing['height']) - 
                          max(candidate['y'], existing['y']))
            overlap_area = overlap_x * overlap_y
            
            # 计算已有格子的面积
            existing_area = existing['width'] * existing['height']
            
            # 如果重叠面积超过较小格子的30%，合并它们
            min_area_for_merge = min(candidate['area'], existing_area)
            if overlap_area > min_area_for_merge * 0.3:
                # 合并：取两个格子的并集
                new_x = min(candidate['x'], existing['x'])
                new_y = min(candidate['y'], existing['y'])
                new_w = max(candidate['x'] + candidate['width'], 
                           existing['x'] + existing['width']) - new_x
                new_h = max(candidate['y'] + candidate['height'], 
                           existing['y'] + existing['height']) - new_y
                
                panels[i] = {
                    'x': int(new_x),
                    'y': int(new_y),
                    'width': int(new_w),
                    'height': int(new_h),
                    'center_x': float(new_x + new_w / 2),
                    'center_y': float(new_y + new_h / 2)
                }
                merged = True
                break
        
        # 如果不能合并，且不与已有格子重叠太多，添加为新格子
        if not merged:
            overlap_with_existing = False
            for existing in panels:
                overlap_x = max(0, min(candidate['x'] + candidate['width'], 
                                     existing['x'] + existing['width']) - 
                              max(candidate['x'], existing['x']))
                overlap_y = max(0, min(candidate['y'] + candidate['height'], 
                                     existing['y'] + existing['height']) - 
                              max(candidate['y'], existing['y']))
                overlap_area = overlap_x * overlap_y
                existing_area = existing['width'] * existing['height']
                min_area_check = min(candidate['area'], existing_area)
                
                # 如果重叠超过50%，跳过（可能是子区域）
                if overlap_area > min_area_check * 0.5:
                    overlap_with_existing = True
                    break
            
            if not overlap_with_existing:
                panels.append({
                    'x': candidate['x'],
                    'y': candidate['y'],
                    'width': candidate['width'],
                    'height': candidate['height'],
                    'center_x': candidate['center_x'],
                    'center_y': candidate['center_y']
                })
    
    # 第五步：绘制遮罩图（黑线白底：背景为白色255，格子线为黑色0）
    panel_mask = np.ones_like(gray) * 255  # 创建全白的mask
    for panel in panels:
        cv2.rectangle(panel_mask, 
                     (panel['x'], panel['y']), 
                     (panel['x'] + panel['width'], 
                      panel['y'] + panel['height']), 
                     0, 4)  # 绘制黑色的格子线
    
    return panel_mask, panels


def merge_panel_mask_with_text_mask(panel_mask, text_mask):
    """
    合并格子遮罩图和文字mask图
    
    参数:
        panel_mask: 格子遮罩图（格子线为黑色0，其他为白色255）
        text_mask: 文字mask图
    
    返回:
        combined_mask: 合并后的mask图
    """
    # 确保两个mask尺寸一致
    if panel_mask.shape != text_mask.shape:
        panel_mask = cv2.resize(panel_mask, (text_mask.shape[1], text_mask.shape[0]))
    
    # 合并：格子线（黑色0）和文字mask（非零部分）都保留
    # 格子遮罩图中，格子线是黑色(0)，其他是白色(255)
    # 文字mask中，文字区域是非零值（通常是白色255）
    # 合并策略：将panel_mask反转（黑线变白线），然后与text_mask合并（取最大值）
    # 这样格子线（白色）和文字（白色）都会保留，背景为黑色
    panel_mask_inv = cv2.bitwise_not(panel_mask)  # 反转：黑线(0)变白线(255)，白底(255)变黑底(0)
    combined_mask = np.maximum(panel_mask_inv, text_mask)  # 合并：保留格子线（白色）和文字（白色）
    
    return combined_mask


if __name__ == '__main__':
    import argparse
    
    parser = argparse.ArgumentParser(description='检测漫画格子并生成遮罩图')
    parser.add_argument('image', help='输入图片路径')
    parser.add_argument('-o', '--output', help='输出目录')
    parser.add_argument('--text-mask', help='文字mask图片路径（可选，用于合并）')
    parser.add_argument('--text-blocks', help='文字块JSON文件路径（可选，用于辅助格子检测）')
    
    args = parser.parse_args()
    
    image_path = Path(args.image)
    if not image_path.exists():
        print(f"[ERROR] 图片文件不存在: {image_path}")
        sys.exit(1)
    
    # 读取图片（处理中文路径）
    # 在Windows上，cv2.imread可能无法直接读取包含中文的路径
    # 使用numpy先读取文件，然后解码
    try:
        import numpy as np
        with open(str(image_path), 'rb') as f:
            img_data = np.frombuffer(f.read(), np.uint8)
            img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
    except Exception as e:
        # 如果上述方法失败，尝试直接读取
        img = cv2.imread(str(image_path))
    
    if img is None:
        print(f"[ERROR] 无法读取图片文件: {image_path}")
        print(f"[DEBUG] 尝试使用绝对路径: {image_path.resolve()}")
        sys.exit(1)
    
    # 尝试读取文字块信息（如果提供，或自动查找）
    text_blocks = None
    
    # 如果提供了文字块文件路径，使用它
    if args.text_blocks:
        text_blocks_path = Path(args.text_blocks)
    else:
        # 否则，尝试自动查找文字块JSON文件
        # 查找可能的文件名：{image_name}_dialogues.json, {image_name}_text_blocks.json
        # 优先从tmp目录查找，然后从output_dir查找
        image_name = image_path.stem
        output_dir = Path(args.output) if args.output else image_path.parent
        tmp_dir = output_dir / 'tmp'
        
        possible_names = [
            tmp_dir / f"{image_name}_dialogues.json",  # 优先从tmp目录查找
            output_dir / f"{image_name}_dialogues.json",
            output_dir / f"{image_name}_text_blocks.json",
            image_path.parent / f"{image_name}_dialogues.json",
            image_path.parent / f"{image_name}_text_blocks.json",
        ]
        
        text_blocks_path = None
        for possible_path in possible_names:
            if possible_path.exists():
                text_blocks_path = possible_path
                print(f"[INFO] 自动找到文字块文件: {text_blocks_path}")
                break
    
    # 读取文字块信息
    if text_blocks_path and text_blocks_path.exists():
        try:
            with open(text_blocks_path, 'r', encoding='utf-8') as f:
                text_blocks_data = json.load(f)
                # 尝试从不同格式的JSON中提取文字块信息
                if isinstance(text_blocks_data, list):
                    text_blocks = text_blocks_data
                elif isinstance(text_blocks_data, dict):
                    # 可能是包含 dialogues 或其他字段的格式
                    if 'dialogues' in text_blocks_data:
                        text_blocks = text_blocks_data['dialogues']
                    elif 'text_blocks' in text_blocks_data:
                        text_blocks = text_blocks_data['text_blocks']
                    elif 'blocks' in text_blocks_data:
                        text_blocks = text_blocks_data['blocks']
                
                # 转换文字块格式为统一格式
                if text_blocks:
                    formatted_blocks = []
                    for block in text_blocks:
                        if isinstance(block, dict):
                            # 尝试提取 bbox 或 xyxy
                            if 'bbox' in block:
                                bbox = block['bbox']
                                formatted_blocks.append({
                                    'xyxy': [bbox['x1'], bbox['y1'], bbox['x2'], bbox['y2']]
                                })
                            elif 'xyxy' in block:
                                formatted_blocks.append({'xyxy': block['xyxy']})
                    text_blocks = formatted_blocks if formatted_blocks else text_blocks
                    print(f"[INFO] 从JSON文件读取到 {len(text_blocks)} 个文字块")
        except Exception as e:
            print(f"[WARN] 无法读取文字块JSON文件: {e}")
            text_blocks = None
    
    # 尝试读取文字遮罩图（如果提供，或自动查找）
    text_mask = None
    
    # 如果提供了文字遮罩图路径，使用它
    if args.text_mask:
        text_mask_path = Path(args.text_mask)
        if text_mask_path.exists():
            try:
                text_mask = cv2.imread(str(text_mask_path), cv2.IMREAD_GRAYSCALE)
                if text_mask is not None:
                    print(f"[INFO] 从文件读取文字遮罩图: {text_mask_path}")
            except Exception as e:
                print(f"[WARN] 无法读取文字遮罩图: {e}")
    else:
        # 否则，尝试自动查找文字遮罩图
        # 优先从tmp目录查找，然后从output_dir查找
        image_name = image_path.stem
        output_dir = Path(args.output) if args.output else image_path.parent
        tmp_dir = output_dir / 'tmp'
        
        possible_names = [
            tmp_dir / f"{image_name}_text_mask.png",  # 优先从tmp目录查找
            output_dir / f"{image_name}_text_mask.png",
            image_path.parent / f"{image_name}_text_mask.png",
        ]
        
        for possible_path in possible_names:
            if possible_path.exists():
                try:
                    text_mask = cv2.imread(str(possible_path), cv2.IMREAD_GRAYSCALE)
                    if text_mask is not None:
                        print(f"[INFO] 自动找到文字遮罩图: {possible_path}")
                        break
                except Exception as e:
                    continue
    
    # 检测格子
    print(f"[INFO] 正在检测漫画格子: {image_path.name}")
    panel_mask, panels = detect_comic_panels(img, text_blocks=text_blocks, text_mask=text_mask)
    print(f"[OK] 检测到 {len(panels)} 个格子")
    
    # 设置输出目录
    if args.output:
        output_dir = Path(args.output)
        output_dir.mkdir(parents=True, exist_ok=True)
    else:
        output_dir = image_path.parent
    
    # 如果output_dir已经是tmp目录，直接使用它；否则创建tmp子目录
    # 检查路径的最后一部分是否是'tmp'（支持相对路径和绝对路径）
    output_dir_str = str(output_dir)
    if output_dir_str.endswith('tmp') or output_dir_str.endswith('tmp\\') or output_dir_str.endswith('tmp/'):
        tmp_dir = output_dir
    else:
        tmp_dir = output_dir / 'tmp'
        tmp_dir.mkdir(parents=True, exist_ok=True)
    
    image_name = image_path.stem
    
    # 保存格子遮罩图到tmp目录（中间文件）
    panel_mask_path = tmp_dir / f"{image_name}_panel_mask.png"
    # 使用cv2.imencode处理中文路径
    success, encoded_img = cv2.imencode('.png', panel_mask)
    if success:
        with open(str(panel_mask_path), 'wb') as f:
            f.write(encoded_img.tobytes())
        print(f"[OK] 已保存格子遮罩图: {panel_mask_path}")
    else:
        print(f"[ERROR] 保存格子遮罩图失败: {panel_mask_path}")
    
    # 如果提供了文字mask，进行合并
    if args.text_mask:
        text_mask_path = Path(args.text_mask)
        if text_mask_path.exists():
            # 使用np.fromfile处理中文路径
            text_mask_array = np.fromfile(str(text_mask_path), dtype=np.uint8)
            text_mask = cv2.imdecode(text_mask_array, cv2.IMREAD_GRAYSCALE)
            if text_mask is not None:
                combined_mask = merge_panel_mask_with_text_mask(panel_mask, text_mask)
                combined_mask_path = tmp_dir / f"{image_name}_combined_mask.png"
                # 使用cv2.imencode处理中文路径
                success, encoded_img = cv2.imencode('.png', combined_mask)
                if success:
                    with open(str(combined_mask_path), 'wb') as f:
                        f.write(encoded_img.tobytes())
                    print(f"[OK] 已保存合并后的mask图: {combined_mask_path}")
                else:
                    print(f"[ERROR] 保存合并后的mask图失败: {combined_mask_path}")
            else:
                print(f"[WARN] 无法读取文字mask图: {text_mask_path}")
        else:
            print(f"[WARN] 文字mask图不存在: {text_mask_path}")
    
    # 保存格子信息JSON到tmp目录（中间文件）
    panels_json = {
        'image_file': image_path.name,
        'panels': panels,
        'total_count': len(panels)
    }
    json_path = tmp_dir / f"{image_name}_panels.json"
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(panels_json, f, ensure_ascii=False, indent=2)
    print(f"[OK] 已保存格子信息: {json_path}")