# -*- coding: utf-8 -*- """ 使用opencv检测漫画格子(分镜框) """ import sys import json from pathlib import Path import cv2 import numpy as np # Windows编码修复 if sys.platform == 'win32': import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') def validate_panel(gray, panel, border_width=10): """ 验证格子是否符合要求:内侧有画面,外侧完全空白 参数: gray: 灰度图 panel: 格子信息字典,包含 x, y, width, height border_width: 检查外侧区域的宽度(像素) 返回: bool: True表示符合要求,False表示不符合 """ im_h, im_w = gray.shape x = panel['x'] y = panel['y'] w = panel['width'] h = panel['height'] # 确保坐标在图片范围内 x = max(0, min(im_w - 1, x)) y = max(0, min(im_h - 1, y)) w = min(w, im_w - x) h = min(h, im_h - y) if w <= 0 or h <= 0: return False # 1. 检查内侧(格子内部)是否有画面内容 # 内侧区域:稍微缩小一点,避免边界影响 inner_margin = max(2, int(min(w, h) * 0.05)) inner_x1 = x + inner_margin inner_y1 = y + inner_margin inner_x2 = x + w - inner_margin inner_y2 = y + h - inner_margin if inner_x2 <= inner_x1 or inner_y2 <= inner_y1: return False # 提取内侧区域 inner_region = gray[inner_y1:inner_y2, inner_x1:inner_x2] # 计算内侧的平均亮度和标准差 inner_mean = np.mean(inner_region) inner_std = np.std(inner_region) # 内侧应该有内容:平均亮度不能太高(< 250),或者标准差要足够大(> 5) # 放宽条件:如果平均亮度很高且标准差很低,说明是纯白色区域,没有内容 if inner_mean > 250 and inner_std < 5: return False # 内侧是纯白色,没有内容 # 2. 检查外侧(格子边界外)是否完全空白 # 外侧区域:格子边界外的 border_width 像素宽度 outer_regions = [] # 上侧外侧区域 if y >= border_width: outer_top = gray[max(0, y - border_width):y, x:min(im_w, x + w)] if outer_top.size > 0: outer_regions.append(('top', outer_top)) # 下侧外侧区域 if y + h + border_width <= im_h: outer_bottom = gray[y + h:min(im_h, y + h + border_width), x:min(im_w, x + w)] if outer_bottom.size > 0: outer_regions.append(('bottom', outer_bottom)) # 左侧外侧区域 if x >= border_width: outer_left = gray[y:min(im_h, y + h), max(0, x - border_width):x] if outer_left.size > 0: outer_regions.append(('left', outer_left)) # 右侧外侧区域 if x + w + border_width <= im_w: outer_right = gray[y:min(im_h, y + h), x + w:min(im_w, x + w + border_width)] if outer_right.size > 0: outer_regions.append(('right', outer_right)) # 如果没有任何外侧区域可以检查(格子太靠近边缘),放宽条件:允许通过 if len(outer_regions) == 0: # 如果格子很大(占图片面积超过5%),可能是主要格子,允许通过 if (w * h) > (im_w * im_h * 0.05): return True return False # 检查所有外侧区域是否都是白色(完全空白) # 放宽条件:外侧应该是白色:平均亮度 > 200,标准差 < 30 # 至少有一半的外侧区域是白色即可 white_count = 0 for side_name, outer_region in outer_regions: if outer_region.size == 0: continue outer_mean = np.mean(outer_region) outer_std = np.std(outer_region) # 如果平均亮度足够高且标准差足够小,认为是白色区域 if outer_mean > 200 and outer_std < 30: white_count += 1 # 至少有一半的外侧区域是白色,就认为符合要求 if white_count >= len(outer_regions) * 0.5: return True # 如果格子很大(占图片面积超过2%),即使外侧不完全符合,也允许通过(可能是主要格子) if (w * h) > (im_w * im_h * 0.02): return True return False def detect_panels_from_white_borders(img): """ 基于漫画最外面区域一定是白色的特点检测格子 策略: 1. 识别图片边缘的白色区域 2. 一行最多两个格子,先识别每个格子的两条边界线 3. 找与这两条线成90°的直线段,组成完整的格子 参数: img: 输入图片(BGR格式) 返回: panel_mask: 格子遮罩图 panels: 格子列表 """ if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray = img.copy() im_h, im_w = gray.shape img_area = im_h * im_w # 1. 识别图片边缘的白色区域 # 检查四个边缘区域(上、下、左、右)是否为白色 border_width = max(10, int(min(im_w, im_h) * 0.02)) # 上边缘 top_region = gray[0:border_width, :] top_white = np.mean(top_region) > 240 # 下边缘 bottom_region = gray[im_h-border_width:im_h, :] bottom_white = np.mean(bottom_region) > 240 # 左边缘 left_region = gray[:, 0:border_width] left_white = np.mean(left_region) > 240 # 右边缘 right_region = gray[:, im_w-border_width:im_w] right_white = np.mean(right_region) > 240 print(f"[DEBUG] 边缘白色检测: 上={top_white}, 下={bottom_white}, 左={left_white}, 右={right_white}") # 2. 检测水平和垂直线(格子边界线) blurred = cv2.GaussianBlur(gray, (5, 5), 0) # 使用自适应阈值 adaptive_thresh = cv2.adaptiveThreshold( blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # Canny边缘检测 edges = cv2.Canny(blurred, 30, 100, apertureSize=3) # 合并 combined_edges = cv2.bitwise_or(edges, adaptive_thresh) # 检测水平线(用于分割行) h_kernel_size = max(int(im_w * 0.05), 30) horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_kernel_size, 1)) horizontal_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, horizontal_kernel) horizontal_lines = cv2.dilate(horizontal_lines, horizontal_kernel, iterations=2) # 检测垂直线(用于分割列,一行最多两个格子) v_kernel_size = max(int(im_h * 0.05), 30) vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_kernel_size)) vertical_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, vertical_kernel) vertical_lines = cv2.dilate(vertical_lines, vertical_kernel, iterations=2) # 3. 使用霍夫直线检测,找到主要的水平和垂直线 min_line_length = max(int(min(im_w, im_h) * 0.15), 50) # 增加最小长度 # 检测水平线(提高阈值,只检测主要的水平分割线) h_lines = cv2.HoughLinesP( horizontal_lines, 1, np.pi/180, threshold=max(int(min(im_w, im_h) * 0.25), 80), # 提高阈值 minLineLength=min_line_length, maxLineGap=max(int(min(im_w, im_h) * 0.03), 10) ) # 检测垂直线(提高阈值,只检测主要的垂直分割线) v_lines = cv2.HoughLinesP( vertical_lines, 1, np.pi/180, threshold=max(int(min(im_w, im_h) * 0.25), 80), # 提高阈值 minLineLength=min_line_length, maxLineGap=max(int(min(im_w, im_h) * 0.03), 10) ) # 4. 合并相近的水平线和垂直线,得到主要的格子分割线 def merge_lines(lines, is_horizontal=True): """合并相近的直线,并过滤掉太短的线""" if lines is None or len(lines) == 0: return [] merged = [] used = set() # 合并阈值:根据图片尺寸调整 merge_threshold_h = max(im_h * 0.03, 30) # 水平线合并阈值 merge_threshold_v = max(im_w * 0.03, 30) # 垂直线合并阈值 for i, line in enumerate(lines): if i in used: continue x1, y1, x2, y2 = line[0] if is_horizontal: # 水平线:合并Y坐标相近的线 y = (y1 + y2) / 2 similar_lines = [line] used.add(i) for j, other_line in enumerate(lines[i+1:], i+1): if j in used: continue ox1, oy1, ox2, oy2 = other_line[0] oy = (oy1 + oy2) / 2 # 如果Y坐标相近(在阈值内),合并 if abs(y - oy) < merge_threshold_h: similar_lines.append(other_line) used.add(j) # 计算合并后的线的平均Y坐标和X范围 avg_y = int(np.mean([(l[0][1] + l[0][3]) / 2 for l in similar_lines])) min_x = int(min([min(l[0][0], l[0][2]) for l in similar_lines])) max_x = int(max([max(l[0][0], l[0][2]) for l in similar_lines])) # 过滤:水平线必须跨越至少50%的图片宽度 if (max_x - min_x) > im_w * 0.5: merged.append((min_x, avg_y, max_x, avg_y)) else: # 垂直线:合并X坐标相近的线 x = (x1 + x2) / 2 similar_lines = [line] used.add(i) for j, other_line in enumerate(lines[i+1:], i+1): if j in used: continue ox1, oy1, ox2, oy2 = other_line[0] ox = (ox1 + ox2) / 2 # 如果X坐标相近(在阈值内),合并 if abs(x - ox) < merge_threshold_v: similar_lines.append(other_line) used.add(j) # 计算合并后的线的平均X坐标和Y范围 avg_x = int(np.mean([(l[0][0] + l[0][2]) / 2 for l in similar_lines])) min_y = int(min([min(l[0][1], l[0][3]) for l in similar_lines])) max_y = int(max([max(l[0][1], l[0][3]) for l in similar_lines])) # 过滤:垂直线必须跨越至少50%的图片高度 if (max_y - min_y) > im_h * 0.5: merged.append((avg_x, min_y, avg_x, max_y)) return merged h_merged = merge_lines(h_lines, is_horizontal=True) v_merged = merge_lines(v_lines, is_horizontal=False) print(f"[DEBUG] 检测到 {len(h_merged)} 条水平线和 {len(v_merged)} 条垂直线") # 5. 根据水平和垂直线构建格子 # 添加图片边界线 h_coords = [0] # 上边界 for line in h_merged: y = line[1] if 0 < y < im_h: h_coords.append(y) h_coords.append(im_h) # 下边界 h_coords = sorted(set(h_coords)) v_coords = [0] # 左边界 for line in v_merged: x = line[0] if 0 < x < im_w: v_coords.append(x) v_coords.append(im_w) # 右边界 v_coords = sorted(set(v_coords)) print(f"[DEBUG] 水平分割线Y坐标: {h_coords}") print(f"[DEBUG] 垂直分割线X坐标: {v_coords}") # 6. 构建格子(每行最多两个格子) panels = [] min_panel_area = img_area * 0.02 # 过滤掉太近的水平线(可能是重复检测) filtered_h_coords = [h_coords[0]] for i in range(1, len(h_coords) - 1): if h_coords[i] - filtered_h_coords[-1] > im_h * 0.05: # 至少间隔5%的高度 filtered_h_coords.append(h_coords[i]) filtered_h_coords.append(h_coords[-1]) # 过滤掉太近的垂直线(可能是重复检测) filtered_v_coords = [v_coords[0]] for i in range(1, len(v_coords) - 1): if v_coords[i] - filtered_v_coords[-1] > im_w * 0.05: # 至少间隔5%的宽度 filtered_v_coords.append(v_coords[i]) filtered_v_coords.append(v_coords[-1]) print(f"[DEBUG] 过滤后: {len(filtered_h_coords)} 条水平分割线, {len(filtered_v_coords)} 条垂直分割线") # 根据过滤后的坐标构建格子 for i in range(len(filtered_h_coords) - 1): y1 = filtered_h_coords[i] y2 = filtered_h_coords[i + 1] # 一行最多两个格子 # 找到在这个行范围内的垂直分割线 row_v_coords = [filtered_v_coords[0]] # 行的左边界 for v_x in filtered_v_coords[1:-1]: # 排除左右边界 # 检查这条垂直线是否与当前行相交 # 检查垂直线附近是否有足够的边缘响应 line_region = combined_edges[y1:y2, max(0, v_x-10):min(im_w, v_x+10)] if np.sum(line_region > 0) > (y2 - y1) * 0.2: # 至少20%的区域有边缘 row_v_coords.append(v_x) row_v_coords.append(filtered_v_coords[-1]) # 行的右边界 # 如果一行有太多垂直分割线,只保留主要的(每行最多2个格子,所以最多3条垂直线:左、中、右) if len(row_v_coords) > 3: # 选择最靠近左、中、右位置的线 left = row_v_coords[0] right = row_v_coords[-1] mid = (left + right) / 2 # 找到最接近中间位置的垂直线 closest_mid = min(row_v_coords[1:-1], key=lambda x: abs(x - mid)) row_v_coords = [left, closest_mid, right] # 根据垂直分割线创建格子(每行最多两个格子) for j in range(len(row_v_coords) - 1): x1 = row_v_coords[j] x2 = row_v_coords[j + 1] # 基本过滤 w = x2 - x1 h = y2 - y1 area = w * h # 过滤:面积太小或高度/宽度太小 min_height = im_h * 0.08 # 至少占图片高度的8% min_width = im_w * 0.15 # 至少占图片宽度的15% if area < min_panel_area or h < min_height or w < min_width: continue # 验证格子:内侧有内容,外侧是白色 panel_candidate = { 'x': int(x1), 'y': int(y1), 'width': int(w), 'height': int(h), 'area': area, 'center_x': float(x1 + w / 2), 'center_y': float(y1 + h / 2) } # 放宽验证条件,因为我们已经基于线条构建了格子 if validate_panel(gray, panel_candidate, border_width=max(5, int(min(im_w, im_h) * 0.005))): panels.append(panel_candidate) # 绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0) panel_mask = np.ones_like(gray) * 255 # 创建全白的mask for panel in panels: cv2.rectangle(panel_mask, (panel['x'], panel['y']), (panel['x'] + panel['width'], panel['y'] + panel['height']), 0, 4) # 绘制黑色的格子线 return panel_mask, panels def detect_panels_from_text_mask(img, text_mask=None): """ 基于文字遮罩图的连通域分析检测格子 使用文字遮罩图找到包含文字的大连通区域,这些区域很可能就是格子 参数: img: 输入图片(BGR格式) text_mask: 文字遮罩图(灰度图,文字区域为白色255,其他为黑色0) 返回: panel_mask: 格子遮罩图 panels: 格子列表 """ if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray = img.copy() im_h, im_w = gray.shape img_area = im_h * im_w # 如果没有提供文字遮罩图,使用传统方法 if text_mask is None: return detect_comic_panels(img) # 确保文字遮罩图尺寸匹配 if text_mask.shape[:2] != (im_h, im_w): text_mask = cv2.resize(text_mask, (im_w, im_h)) # 方法:基于文字遮罩图的连通域分析 # 1. 对文字遮罩图进行膨胀,连接相近的文字区域 # 2. 使用连通域分析找到包含文字的大区域 # 3. 扩展这些区域以包含周围的空白区域 # 4. 验证每个区域是否符合格子特征 # 对文字遮罩图进行膨胀,连接相近的文字区域 kernel_size = max(5, int(min(im_w, im_h) * 0.01)) kernel = np.ones((kernel_size, kernel_size), np.uint8) dilated_mask = cv2.dilate(text_mask, kernel, iterations=3) dilated_mask = cv2.morphologyEx(dilated_mask, cv2.MORPH_CLOSE, kernel, iterations=2) # 使用连通域分析找到包含文字的大区域 connectivity = 8 num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats( dilated_mask, connectivity, cv2.CV_32S ) print(f"[DEBUG] 文字遮罩图连通域分析:找到 {num_labels - 1} 个连通区域(排除背景)") panels = [] min_panel_area = img_area * 0.02 # 最小格子面积(2%) max_panel_area = img_area * 0.50 # 最大格子面积(50%) # 遍历所有连通区域(跳过背景标签0) for label_index in range(1, num_labels): stat = stats[label_index] x, y, w, h, area = stat # 基本过滤:面积太小或太大 if area < min_panel_area or area > max_panel_area: continue # 计算宽高比 aspect_ratio = w / h if h > 0 else 0 if aspect_ratio < 0.15 or aspect_ratio > 6.0: continue # 扩展边界框以包含周围的空白区域 # 扩展比例:向四周扩展文字区域尺寸的 30-50% expand_x = int(w * 0.4) expand_y = int(h * 0.4) panel_x1 = max(0, x - expand_x) panel_y1 = max(0, y - expand_y) panel_x2 = min(im_w, x + w + expand_x) panel_y2 = min(im_h, y + h + expand_y) panel_w = panel_x2 - panel_x1 panel_h = panel_y2 - panel_y1 panel_area = panel_w * panel_h # 验证格子 panel_candidate = { 'x': panel_x1, 'y': panel_y1, 'width': panel_w, 'height': panel_h, 'area': panel_area, 'center_x': float(panel_x1 + panel_w / 2), 'center_y': float(panel_y1 + panel_h / 2) } if validate_panel(gray, panel_candidate, border_width=max(10, int(min(im_w, im_h) * 0.01))): panels.append(panel_candidate) # 合并重叠的格子 merged_panels = [] for panel in panels: merged = False for i, existing in enumerate(merged_panels): # 计算重叠 overlap_x = max(0, min(panel['x'] + panel['width'], existing['x'] + existing['width']) - max(panel['x'], existing['x'])) overlap_y = max(0, min(panel['y'] + panel['height'], existing['y'] + existing['height']) - max(panel['y'], existing['y'])) overlap_area = overlap_x * overlap_y # 如果重叠面积超过较小格子的50%,合并它们 min_area_for_merge = min(panel['area'], existing['area']) if overlap_area > min_area_for_merge * 0.5: # 合并:取两个格子的并集 new_x = min(panel['x'], existing['x']) new_y = min(panel['y'], existing['y']) new_w = max(panel['x'] + panel['width'], existing['x'] + existing['width']) - new_x new_h = max(panel['y'] + panel['height'], existing['y'] + existing['height']) - new_y merged_panels[i] = { 'x': int(new_x), 'y': int(new_y), 'width': int(new_w), 'height': int(new_h), 'center_x': float(new_x + new_w / 2), 'center_y': float(new_y + new_h / 2) } merged = True break if not merged: merged_panels.append(panel) # 绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0) panel_mask = np.ones_like(gray) * 255 # 创建全白的mask for panel in merged_panels: cv2.rectangle(panel_mask, (panel['x'], panel['y']), (panel['x'] + panel['width'], panel['y'] + panel['height']), 0, 4) # 绘制黑色的格子线 return panel_mask, merged_panels def detect_panels_from_text_blocks(img, text_blocks=None): """ 基于文字块位置检测格子 如果提供了文字块列表,使用它们来推断格子边界 参数: img: 输入图片(BGR格式) text_blocks: 文字块列表(可选),每个元素包含 xyxy 坐标 [x1, y1, x2, y2] 返回: panel_mask: 格子遮罩图 panels: 格子列表 """ if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray = img.copy() im_h, im_w = gray.shape img_area = im_h * im_w # 如果没有提供文字块,使用传统方法 if text_blocks is None or len(text_blocks) == 0: return detect_comic_panels(img) # 方法:基于文字块聚类和区域扩展 # 1. 根据文字块位置进行聚类(使用简单的距离阈值) # 2. 对每个聚类,扩展边界框以包含周围的空白区域 # 3. 验证扩展后的区域是否符合格子特征 panels = [] used_blocks = set() # 计算文字块之间的距离,进行聚类 min_panel_area = img_area * 0.02 # 最小格子面积 max_panel_area = img_area * 0.50 # 最大格子面积 for i, block in enumerate(text_blocks): if i in used_blocks: continue # 获取文字块的边界框 if isinstance(block, dict): x1, y1, x2, y2 = block.get('xyxy', block.get('bbox', [0, 0, 0, 0])) else: x1, y1, x2, y2 = block[:4] if len(block) >= 4 else [0, 0, 0, 0] if x2 <= x1 or y2 <= y1: continue # 找到与当前文字块相邻的其他文字块(聚类) cluster_blocks = [i] used_blocks.add(i) # 扩展搜索范围:查找附近的文字块 search_margin = max((x2 - x1) * 2, (y2 - y1) * 2, 100) for j, other_block in enumerate(text_blocks): if j in used_blocks or j == i: continue if isinstance(other_block, dict): ox1, oy1, ox2, oy2 = other_block.get('xyxy', other_block.get('bbox', [0, 0, 0, 0])) else: ox1, oy1, ox2, oy2 = other_block[:4] if len(other_block) >= 4 else [0, 0, 0, 0] if ox2 <= ox1 or oy2 <= oy1: continue # 计算两个文字块的距离 center_x = (x1 + x2) / 2 center_y = (y1 + y2) / 2 o_center_x = (ox1 + ox2) / 2 o_center_y = (oy1 + oy2) / 2 distance = np.sqrt((center_x - o_center_x)**2 + (center_y - o_center_y)**2) # 如果距离在搜索范围内,加入聚类 if distance < search_margin: cluster_blocks.append(j) used_blocks.add(j) # 计算聚类的边界框 def get_bbox(block): if isinstance(block, dict): return block.get('xyxy', block.get('bbox', [0, 0, 0, 0])) else: return block[:4] if len(block) >= 4 else [0, 0, 0, 0] cluster_bboxes = [get_bbox(text_blocks[b]) for b in cluster_blocks] cluster_x1 = min([bbox[0] for bbox in cluster_bboxes]) cluster_y1 = min([bbox[1] for bbox in cluster_bboxes]) cluster_x2 = max([bbox[2] for bbox in cluster_bboxes]) cluster_y2 = max([bbox[3] for bbox in cluster_bboxes]) # 扩展边界框以包含周围的空白区域 # 扩展比例:向四周扩展文字块尺寸的 20-50% expand_x = int((cluster_x2 - cluster_x1) * 0.3) expand_y = int((cluster_y2 - cluster_y1) * 0.3) panel_x1 = max(0, int(cluster_x1 - expand_x)) panel_y1 = max(0, int(cluster_y1 - expand_y)) panel_x2 = min(im_w, int(cluster_x2 + expand_x)) panel_y2 = min(im_h, int(cluster_y2 + expand_y)) panel_w = panel_x2 - panel_x1 panel_h = panel_y2 - panel_y1 panel_area = panel_w * panel_h # 基本过滤 if panel_area < min_panel_area or panel_area > max_panel_area: continue # 验证格子 panel_candidate = { 'x': panel_x1, 'y': panel_y1, 'width': panel_w, 'height': panel_h, 'area': panel_area, 'center_x': float(panel_x1 + panel_w / 2), 'center_y': float(panel_y1 + panel_h / 2) } if validate_panel(gray, panel_candidate, border_width=max(10, int(min(im_w, im_h) * 0.01))): panels.append(panel_candidate) # 绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0) panel_mask = np.ones_like(gray) * 255 # 创建全白的mask for panel in panels: cv2.rectangle(panel_mask, (panel['x'], panel['y']), (panel['x'] + panel['width'], panel['y'] + panel['height']), 0, 4) # 绘制黑色的格子线 return panel_mask, panels def detect_comic_panels(img, text_blocks=None, text_mask=None): """ 使用opencv检测漫画格子(分镜框)- 改进版 策略:优先使用基于白色边界的方法,其次使用文字遮罩图,最后使用传统方法 参数: img: 输入图片(BGR格式) text_blocks: 文字块列表(可选),来自 comic-text-detector text_mask: 文字遮罩图(可选),来自 comic-text-detector 返回: panel_mask: 格子遮罩图(灰度图,格子线为黑色0,其他为白色255) panels: 格子列表,每个格子包含边界框信息 """ # 优先使用基于白色边界的方法(利用漫画边缘一定是白色的特点) try: print(f"[DEBUG] 尝试使用基于白色边界的方法...") panel_mask, panels = detect_panels_from_white_borders(img) print(f"[DEBUG] 基于白色边界的方法检测到 {len(panels)} 个格子") if len(panels) >= 4: # 如果检测到足够多的格子,使用这个方法 print(f"[DEBUG] 使用基于白色边界的方法,检测到 {len(panels)} 个格子") return panel_mask, panels else: print(f"[DEBUG] 基于白色边界的方法检测到的格子数量不足({len(panels)}个),尝试其他方法") except Exception as e: print(f"[WARN] 基于白色边界的方法失败: {e}") import traceback traceback.print_exc() # 其次使用基于文字遮罩图的方法 if text_mask is not None: try: print(f"[DEBUG] 尝试使用基于文字遮罩图的连通域分析方法...") panel_mask, panels = detect_panels_from_text_mask(img, text_mask) print(f"[DEBUG] 基于文字遮罩图的方法检测到 {len(panels)} 个格子") if len(panels) >= 4: # 如果检测到足够多的格子,使用这个方法 print(f"[DEBUG] 使用基于文字遮罩图的方法,检测到 {len(panels)} 个格子") return panel_mask, panels else: print(f"[DEBUG] 基于文字遮罩图的方法检测到的格子数量不足({len(panels)}个),尝试其他方法") except Exception as e: print(f"[WARN] 基于文字遮罩图的方法失败: {e}") import traceback traceback.print_exc() # 再次使用基于文字块的方法 if text_blocks is not None and len(text_blocks) > 0: try: print(f"[DEBUG] 尝试使用基于文字块的方法,文字块数量: {len(text_blocks)}") panel_mask, panels = detect_panels_from_text_blocks(img, text_blocks) print(f"[DEBUG] 基于文字块的方法检测到 {len(panels)} 个格子") if len(panels) >= 4: # 如果检测到足够多的格子,使用这个方法 print(f"[DEBUG] 使用基于文字块的方法,检测到 {len(panels)} 个格子") return panel_mask, panels else: print(f"[DEBUG] 基于文字块的方法检测到的格子数量不足({len(panels)}个),使用传统方法") except Exception as e: print(f"[WARN] 基于文字块的方法失败,使用传统方法: {e}") import traceback traceback.print_exc() # 传统方法:基于边缘检测和轮廓分析 # 转换为灰度图 if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray = img.copy() im_h, im_w = gray.shape img_area = im_h * im_w # 第一步:放宽条件,检测所有可能的候选格子 # 方法1: 改进的边缘检测(更宽松) blurred = cv2.GaussianBlur(gray, (5, 5), 0) # 使用自适应阈值(更敏感) adaptive_thresh = cv2.adaptiveThreshold( blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # Canny边缘检测(更敏感的参数) edges = cv2.Canny(blurred, 20, 80, apertureSize=3) # 合并自适应阈值和Canny边缘 combined_edges = cv2.bitwise_or(edges, adaptive_thresh) # 方法2: 检测水平线和垂直线(更宽松的kernel) h_kernel_size = max(int(im_w * 0.03), 20) # 水平线kernel宽度(更小,更敏感) v_kernel_size = max(int(im_h * 0.03), 20) # 垂直线kernel高度(更小,更敏感) # 检测水平线 horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_kernel_size, 1)) horizontal_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, horizontal_kernel) horizontal_lines = cv2.dilate(horizontal_lines, horizontal_kernel, iterations=3) # 增加迭代次数 # 检测垂直线 vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_kernel_size)) vertical_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, vertical_kernel) vertical_lines = cv2.dilate(vertical_lines, vertical_kernel, iterations=3) # 增加迭代次数 # 合并水平和垂直线 lines_mask = cv2.bitwise_or(horizontal_lines, vertical_lines) # 对边缘进行膨胀,连接断开的线条(更激进) kernel_size = max(5, int(min(im_w, im_h) * 0.005)) kernel = np.ones((kernel_size, kernel_size), np.uint8) dilated = cv2.dilate(lines_mask, kernel, iterations=5) # 增加迭代次数 dilated = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, kernel, iterations=3) # 增加迭代次数 # 如果线条mask太稀疏,尝试使用更直接的方法:基于灰度值的分割 # 使用阈值分割,找到可能的格子区域 _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # 合并线条mask和阈值分割结果 combined_mask = cv2.bitwise_or(dilated, thresh) # 查找轮廓(使用RETR_TREE获取所有轮廓,包括嵌套的) contours, hierarchy = cv2.findContours( combined_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) print(f"[DEBUG] 找到 {len(contours)} 个轮廓") # 第二步:放宽条件,收集所有候选格子 candidate_panels = [] min_area = img_area * 0.005 # 非常宽松:至少占图片0.5%的面积 max_area = img_area * 0.95 # 最大不超过95% for contour in contours: # 计算轮廓的边界框 x, y, w, h = cv2.boundingRect(contour) area = w * h # 基本过滤(非常宽松) if area < min_area or area > max_area: continue # 计算宽高比(非常宽松的范围) aspect_ratio = w / h if h > 0 else 0 if aspect_ratio < 0.1 or aspect_ratio > 10.0: continue candidate_panels.append({ 'x': int(x), 'y': int(y), 'width': int(w), 'height': int(h), 'area': area, 'aspect_ratio': aspect_ratio, 'center_x': float(x + w / 2), 'center_y': float(y + h / 2) }) # 按面积排序,优先选择较大的区域(更可能是主要格子) candidate_panels.sort(key=lambda p: p['area'], reverse=True) # 如果候选格子太多,只保留前100个最大的(避免验证太多小区域) if len(candidate_panels) > 100: candidate_panels = candidate_panels[:100] print(f"[DEBUG] 检测到 {len(candidate_panels)} 个候选格子(已按面积排序)") # 第三步:验证每个候选格子是否符合要求(内侧有画面,外侧完全空白) valid_panels = [] border_width = max(10, int(min(im_w, im_h) * 0.01)) for i, candidate in enumerate(candidate_panels): is_valid = validate_panel(gray, candidate, border_width=border_width) if is_valid: valid_panels.append(candidate) # 只对前10个候选格子输出调试信息 if i < 10: area_ratio = (candidate['area'] / img_area) * 100 print(f"[DEBUG] 候选格子 {i+1}: 面积={candidate['area']:.0f} ({area_ratio:.2f}%), " f"尺寸={candidate['width']}x{candidate['height']}, " f"位置=({candidate['x']}, {candidate['y']}), 验证={'通过' if is_valid else '失败'}") print(f"[DEBUG] 验证后保留 {len(valid_panels)} 个有效格子") # 第四步:合并重叠和相邻的格子 panels = [] for candidate in valid_panels: merged = False # 检查是否可以与已有格子合并 for i, existing in enumerate(panels): # 计算重叠或相邻关系 overlap_x = max(0, min(candidate['x'] + candidate['width'], existing['x'] + existing['width']) - max(candidate['x'], existing['x'])) overlap_y = max(0, min(candidate['y'] + candidate['height'], existing['y'] + existing['height']) - max(candidate['y'], existing['y'])) overlap_area = overlap_x * overlap_y # 计算已有格子的面积 existing_area = existing['width'] * existing['height'] # 如果重叠面积超过较小格子的30%,合并它们 min_area_for_merge = min(candidate['area'], existing_area) if overlap_area > min_area_for_merge * 0.3: # 合并:取两个格子的并集 new_x = min(candidate['x'], existing['x']) new_y = min(candidate['y'], existing['y']) new_w = max(candidate['x'] + candidate['width'], existing['x'] + existing['width']) - new_x new_h = max(candidate['y'] + candidate['height'], existing['y'] + existing['height']) - new_y panels[i] = { 'x': int(new_x), 'y': int(new_y), 'width': int(new_w), 'height': int(new_h), 'center_x': float(new_x + new_w / 2), 'center_y': float(new_y + new_h / 2) } merged = True break # 如果不能合并,且不与已有格子重叠太多,添加为新格子 if not merged: overlap_with_existing = False for existing in panels: overlap_x = max(0, min(candidate['x'] + candidate['width'], existing['x'] + existing['width']) - max(candidate['x'], existing['x'])) overlap_y = max(0, min(candidate['y'] + candidate['height'], existing['y'] + existing['height']) - max(candidate['y'], existing['y'])) overlap_area = overlap_x * overlap_y existing_area = existing['width'] * existing['height'] min_area_check = min(candidate['area'], existing_area) # 如果重叠超过50%,跳过(可能是子区域) if overlap_area > min_area_check * 0.5: overlap_with_existing = True break if not overlap_with_existing: panels.append({ 'x': candidate['x'], 'y': candidate['y'], 'width': candidate['width'], 'height': candidate['height'], 'center_x': candidate['center_x'], 'center_y': candidate['center_y'] }) # 第五步:绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0) panel_mask = np.ones_like(gray) * 255 # 创建全白的mask for panel in panels: cv2.rectangle(panel_mask, (panel['x'], panel['y']), (panel['x'] + panel['width'], panel['y'] + panel['height']), 0, 4) # 绘制黑色的格子线 return panel_mask, panels def merge_panel_mask_with_text_mask(panel_mask, text_mask): """ 合并格子遮罩图和文字mask图 参数: panel_mask: 格子遮罩图(格子线为黑色0,其他为白色255) text_mask: 文字mask图 返回: combined_mask: 合并后的mask图 """ # 确保两个mask尺寸一致 if panel_mask.shape != text_mask.shape: panel_mask = cv2.resize(panel_mask, (text_mask.shape[1], text_mask.shape[0])) # 合并:格子线(黑色0)和文字mask(非零部分)都保留 # 格子遮罩图中,格子线是黑色(0),其他是白色(255) # 文字mask中,文字区域是非零值(通常是白色255) # 合并策略:将panel_mask反转(黑线变白线),然后与text_mask合并(取最大值) # 这样格子线(白色)和文字(白色)都会保留,背景为黑色 panel_mask_inv = cv2.bitwise_not(panel_mask) # 反转:黑线(0)变白线(255),白底(255)变黑底(0) combined_mask = np.maximum(panel_mask_inv, text_mask) # 合并:保留格子线(白色)和文字(白色) return combined_mask if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='检测漫画格子并生成遮罩图') parser.add_argument('image', help='输入图片路径') parser.add_argument('-o', '--output', help='输出目录') parser.add_argument('--text-mask', help='文字mask图片路径(可选,用于合并)') parser.add_argument('--text-blocks', help='文字块JSON文件路径(可选,用于辅助格子检测)') args = parser.parse_args() image_path = Path(args.image) if not image_path.exists(): print(f"[ERROR] 图片文件不存在: {image_path}") sys.exit(1) # 读取图片(处理中文路径) # 在Windows上,cv2.imread可能无法直接读取包含中文的路径 # 使用numpy先读取文件,然后解码 try: import numpy as np with open(str(image_path), 'rb') as f: img_data = np.frombuffer(f.read(), np.uint8) img = cv2.imdecode(img_data, cv2.IMREAD_COLOR) except Exception as e: # 如果上述方法失败,尝试直接读取 img = cv2.imread(str(image_path)) if img is None: print(f"[ERROR] 无法读取图片文件: {image_path}") print(f"[DEBUG] 尝试使用绝对路径: {image_path.resolve()}") sys.exit(1) # 尝试读取文字块信息(如果提供,或自动查找) text_blocks = None # 如果提供了文字块文件路径,使用它 if args.text_blocks: text_blocks_path = Path(args.text_blocks) else: # 否则,尝试自动查找文字块JSON文件 # 查找可能的文件名:{image_name}_dialogues.json, {image_name}_text_blocks.json # 优先从tmp目录查找,然后从output_dir查找 image_name = image_path.stem output_dir = Path(args.output) if args.output else image_path.parent tmp_dir = output_dir / 'tmp' possible_names = [ tmp_dir / f"{image_name}_dialogues.json", # 优先从tmp目录查找 output_dir / f"{image_name}_dialogues.json", output_dir / f"{image_name}_text_blocks.json", image_path.parent / f"{image_name}_dialogues.json", image_path.parent / f"{image_name}_text_blocks.json", ] text_blocks_path = None for possible_path in possible_names: if possible_path.exists(): text_blocks_path = possible_path print(f"[INFO] 自动找到文字块文件: {text_blocks_path}") break # 读取文字块信息 if text_blocks_path and text_blocks_path.exists(): try: with open(text_blocks_path, 'r', encoding='utf-8') as f: text_blocks_data = json.load(f) # 尝试从不同格式的JSON中提取文字块信息 if isinstance(text_blocks_data, list): text_blocks = text_blocks_data elif isinstance(text_blocks_data, dict): # 可能是包含 dialogues 或其他字段的格式 if 'dialogues' in text_blocks_data: text_blocks = text_blocks_data['dialogues'] elif 'text_blocks' in text_blocks_data: text_blocks = text_blocks_data['text_blocks'] elif 'blocks' in text_blocks_data: text_blocks = text_blocks_data['blocks'] # 转换文字块格式为统一格式 if text_blocks: formatted_blocks = [] for block in text_blocks: if isinstance(block, dict): # 尝试提取 bbox 或 xyxy if 'bbox' in block: bbox = block['bbox'] formatted_blocks.append({ 'xyxy': [bbox['x1'], bbox['y1'], bbox['x2'], bbox['y2']] }) elif 'xyxy' in block: formatted_blocks.append({'xyxy': block['xyxy']}) text_blocks = formatted_blocks if formatted_blocks else text_blocks print(f"[INFO] 从JSON文件读取到 {len(text_blocks)} 个文字块") except Exception as e: print(f"[WARN] 无法读取文字块JSON文件: {e}") text_blocks = None # 尝试读取文字遮罩图(如果提供,或自动查找) text_mask = None # 如果提供了文字遮罩图路径,使用它 if args.text_mask: text_mask_path = Path(args.text_mask) if text_mask_path.exists(): try: text_mask = cv2.imread(str(text_mask_path), cv2.IMREAD_GRAYSCALE) if text_mask is not None: print(f"[INFO] 从文件读取文字遮罩图: {text_mask_path}") except Exception as e: print(f"[WARN] 无法读取文字遮罩图: {e}") else: # 否则,尝试自动查找文字遮罩图 # 优先从tmp目录查找,然后从output_dir查找 image_name = image_path.stem output_dir = Path(args.output) if args.output else image_path.parent tmp_dir = output_dir / 'tmp' possible_names = [ tmp_dir / f"{image_name}_text_mask.png", # 优先从tmp目录查找 output_dir / f"{image_name}_text_mask.png", image_path.parent / f"{image_name}_text_mask.png", ] for possible_path in possible_names: if possible_path.exists(): try: text_mask = cv2.imread(str(possible_path), cv2.IMREAD_GRAYSCALE) if text_mask is not None: print(f"[INFO] 自动找到文字遮罩图: {possible_path}") break except Exception as e: continue # 检测格子 print(f"[INFO] 正在检测漫画格子: {image_path.name}") panel_mask, panels = detect_comic_panels(img, text_blocks=text_blocks, text_mask=text_mask) print(f"[OK] 检测到 {len(panels)} 个格子") # 设置输出目录 if args.output: output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) else: output_dir = image_path.parent # 如果output_dir已经是tmp目录,直接使用它;否则创建tmp子目录 # 检查路径的最后一部分是否是'tmp'(支持相对路径和绝对路径) output_dir_str = str(output_dir) if output_dir_str.endswith('tmp') or output_dir_str.endswith('tmp\\') or output_dir_str.endswith('tmp/'): tmp_dir = output_dir else: tmp_dir = output_dir / 'tmp' tmp_dir.mkdir(parents=True, exist_ok=True) image_name = image_path.stem # 保存格子遮罩图到tmp目录(中间文件) panel_mask_path = tmp_dir / f"{image_name}_panel_mask.png" # 使用cv2.imencode处理中文路径 success, encoded_img = cv2.imencode('.png', panel_mask) if success: with open(str(panel_mask_path), 'wb') as f: f.write(encoded_img.tobytes()) print(f"[OK] 已保存格子遮罩图: {panel_mask_path}") else: print(f"[ERROR] 保存格子遮罩图失败: {panel_mask_path}") # 如果提供了文字mask,进行合并 if args.text_mask: text_mask_path = Path(args.text_mask) if text_mask_path.exists(): # 使用np.fromfile处理中文路径 text_mask_array = np.fromfile(str(text_mask_path), dtype=np.uint8) text_mask = cv2.imdecode(text_mask_array, cv2.IMREAD_GRAYSCALE) if text_mask is not None: combined_mask = merge_panel_mask_with_text_mask(panel_mask, text_mask) combined_mask_path = tmp_dir / f"{image_name}_combined_mask.png" # 使用cv2.imencode处理中文路径 success, encoded_img = cv2.imencode('.png', combined_mask) if success: with open(str(combined_mask_path), 'wb') as f: f.write(encoded_img.tobytes()) print(f"[OK] 已保存合并后的mask图: {combined_mask_path}") else: print(f"[ERROR] 保存合并后的mask图失败: {combined_mask_path}") else: print(f"[WARN] 无法读取文字mask图: {text_mask_path}") else: print(f"[WARN] 文字mask图不存在: {text_mask_path}") # 保存格子信息JSON到tmp目录(中间文件) panels_json = { 'image_file': image_path.name, 'panels': panels, 'total_count': len(panels) } json_path = tmp_dir / f"{image_name}_panels.json" with open(json_path, 'w', encoding='utf-8') as f: json.dump(panels_json, f, ensure_ascii=False, indent=2) print(f"[OK] 已保存格子信息: {json_path}")