| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190 |
- # -*- coding: utf-8 -*-
- """
- 使用opencv检测漫画格子(分镜框)
- """
- import sys
- import json
- from pathlib import Path
- import cv2
- import numpy as np
- # Windows编码修复
- if sys.platform == 'win32':
- import io
- sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
- sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
- def validate_panel(gray, panel, border_width=10):
- """
- 验证格子是否符合要求:内侧有画面,外侧完全空白
-
- 参数:
- gray: 灰度图
- panel: 格子信息字典,包含 x, y, width, height
- border_width: 检查外侧区域的宽度(像素)
-
- 返回:
- bool: True表示符合要求,False表示不符合
- """
- im_h, im_w = gray.shape
- x = panel['x']
- y = panel['y']
- w = panel['width']
- h = panel['height']
-
- # 确保坐标在图片范围内
- x = max(0, min(im_w - 1, x))
- y = max(0, min(im_h - 1, y))
- w = min(w, im_w - x)
- h = min(h, im_h - y)
-
- if w <= 0 or h <= 0:
- return False
-
- # 1. 检查内侧(格子内部)是否有画面内容
- # 内侧区域:稍微缩小一点,避免边界影响
- inner_margin = max(2, int(min(w, h) * 0.05))
- inner_x1 = x + inner_margin
- inner_y1 = y + inner_margin
- inner_x2 = x + w - inner_margin
- inner_y2 = y + h - inner_margin
-
- if inner_x2 <= inner_x1 or inner_y2 <= inner_y1:
- return False
-
- # 提取内侧区域
- inner_region = gray[inner_y1:inner_y2, inner_x1:inner_x2]
-
- # 计算内侧的平均亮度和标准差
- inner_mean = np.mean(inner_region)
- inner_std = np.std(inner_region)
-
- # 内侧应该有内容:平均亮度不能太高(< 250),或者标准差要足够大(> 5)
- # 放宽条件:如果平均亮度很高且标准差很低,说明是纯白色区域,没有内容
- if inner_mean > 250 and inner_std < 5:
- return False # 内侧是纯白色,没有内容
-
- # 2. 检查外侧(格子边界外)是否完全空白
- # 外侧区域:格子边界外的 border_width 像素宽度
- outer_regions = []
-
- # 上侧外侧区域
- if y >= border_width:
- outer_top = gray[max(0, y - border_width):y, x:min(im_w, x + w)]
- if outer_top.size > 0:
- outer_regions.append(('top', outer_top))
-
- # 下侧外侧区域
- if y + h + border_width <= im_h:
- outer_bottom = gray[y + h:min(im_h, y + h + border_width), x:min(im_w, x + w)]
- if outer_bottom.size > 0:
- outer_regions.append(('bottom', outer_bottom))
-
- # 左侧外侧区域
- if x >= border_width:
- outer_left = gray[y:min(im_h, y + h), max(0, x - border_width):x]
- if outer_left.size > 0:
- outer_regions.append(('left', outer_left))
-
- # 右侧外侧区域
- if x + w + border_width <= im_w:
- outer_right = gray[y:min(im_h, y + h), x + w:min(im_w, x + w + border_width)]
- if outer_right.size > 0:
- outer_regions.append(('right', outer_right))
-
- # 如果没有任何外侧区域可以检查(格子太靠近边缘),放宽条件:允许通过
- if len(outer_regions) == 0:
- # 如果格子很大(占图片面积超过5%),可能是主要格子,允许通过
- if (w * h) > (im_w * im_h * 0.05):
- return True
- return False
-
- # 检查所有外侧区域是否都是白色(完全空白)
- # 放宽条件:外侧应该是白色:平均亮度 > 200,标准差 < 30
- # 至少有一半的外侧区域是白色即可
- white_count = 0
- for side_name, outer_region in outer_regions:
- if outer_region.size == 0:
- continue
- outer_mean = np.mean(outer_region)
- outer_std = np.std(outer_region)
-
- # 如果平均亮度足够高且标准差足够小,认为是白色区域
- if outer_mean > 200 and outer_std < 30:
- white_count += 1
-
- # 至少有一半的外侧区域是白色,就认为符合要求
- if white_count >= len(outer_regions) * 0.5:
- return True
-
- # 如果格子很大(占图片面积超过2%),即使外侧不完全符合,也允许通过(可能是主要格子)
- if (w * h) > (im_w * im_h * 0.02):
- return True
-
- return False
- def detect_panels_from_white_borders(img):
- """
- 基于漫画最外面区域一定是白色的特点检测格子
- 策略:
- 1. 识别图片边缘的白色区域
- 2. 一行最多两个格子,先识别每个格子的两条边界线
- 3. 找与这两条线成90°的直线段,组成完整的格子
-
- 参数:
- img: 输入图片(BGR格式)
-
- 返回:
- panel_mask: 格子遮罩图
- panels: 格子列表
- """
- if len(img.shape) == 3:
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- else:
- gray = img.copy()
-
- im_h, im_w = gray.shape
- img_area = im_h * im_w
-
- # 1. 识别图片边缘的白色区域
- # 检查四个边缘区域(上、下、左、右)是否为白色
- border_width = max(10, int(min(im_w, im_h) * 0.02))
-
- # 上边缘
- top_region = gray[0:border_width, :]
- top_white = np.mean(top_region) > 240
-
- # 下边缘
- bottom_region = gray[im_h-border_width:im_h, :]
- bottom_white = np.mean(bottom_region) > 240
-
- # 左边缘
- left_region = gray[:, 0:border_width]
- left_white = np.mean(left_region) > 240
-
- # 右边缘
- right_region = gray[:, im_w-border_width:im_w]
- right_white = np.mean(right_region) > 240
-
- print(f"[DEBUG] 边缘白色检测: 上={top_white}, 下={bottom_white}, 左={left_white}, 右={right_white}")
-
- # 2. 检测水平和垂直线(格子边界线)
- blurred = cv2.GaussianBlur(gray, (5, 5), 0)
-
- # 使用自适应阈值
- adaptive_thresh = cv2.adaptiveThreshold(
- blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
- cv2.THRESH_BINARY_INV, 11, 2
- )
-
- # Canny边缘检测
- edges = cv2.Canny(blurred, 30, 100, apertureSize=3)
-
- # 合并
- combined_edges = cv2.bitwise_or(edges, adaptive_thresh)
-
- # 检测水平线(用于分割行)
- h_kernel_size = max(int(im_w * 0.05), 30)
- horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_kernel_size, 1))
- horizontal_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, horizontal_kernel)
- horizontal_lines = cv2.dilate(horizontal_lines, horizontal_kernel, iterations=2)
-
- # 检测垂直线(用于分割列,一行最多两个格子)
- v_kernel_size = max(int(im_h * 0.05), 30)
- vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_kernel_size))
- vertical_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, vertical_kernel)
- vertical_lines = cv2.dilate(vertical_lines, vertical_kernel, iterations=2)
-
- # 3. 使用霍夫直线检测,找到主要的水平和垂直线
- min_line_length = max(int(min(im_w, im_h) * 0.15), 50) # 增加最小长度
-
- # 检测水平线(提高阈值,只检测主要的水平分割线)
- h_lines = cv2.HoughLinesP(
- horizontal_lines,
- 1,
- np.pi/180,
- threshold=max(int(min(im_w, im_h) * 0.25), 80), # 提高阈值
- minLineLength=min_line_length,
- maxLineGap=max(int(min(im_w, im_h) * 0.03), 10)
- )
-
- # 检测垂直线(提高阈值,只检测主要的垂直分割线)
- v_lines = cv2.HoughLinesP(
- vertical_lines,
- 1,
- np.pi/180,
- threshold=max(int(min(im_w, im_h) * 0.25), 80), # 提高阈值
- minLineLength=min_line_length,
- maxLineGap=max(int(min(im_w, im_h) * 0.03), 10)
- )
-
- # 4. 合并相近的水平线和垂直线,得到主要的格子分割线
- def merge_lines(lines, is_horizontal=True):
- """合并相近的直线,并过滤掉太短的线"""
- if lines is None or len(lines) == 0:
- return []
-
- merged = []
- used = set()
-
- # 合并阈值:根据图片尺寸调整
- merge_threshold_h = max(im_h * 0.03, 30) # 水平线合并阈值
- merge_threshold_v = max(im_w * 0.03, 30) # 垂直线合并阈值
-
- for i, line in enumerate(lines):
- if i in used:
- continue
-
- x1, y1, x2, y2 = line[0]
-
- if is_horizontal:
- # 水平线:合并Y坐标相近的线
- y = (y1 + y2) / 2
- similar_lines = [line]
- used.add(i)
-
- for j, other_line in enumerate(lines[i+1:], i+1):
- if j in used:
- continue
- ox1, oy1, ox2, oy2 = other_line[0]
- oy = (oy1 + oy2) / 2
-
- # 如果Y坐标相近(在阈值内),合并
- if abs(y - oy) < merge_threshold_h:
- similar_lines.append(other_line)
- used.add(j)
-
- # 计算合并后的线的平均Y坐标和X范围
- avg_y = int(np.mean([(l[0][1] + l[0][3]) / 2 for l in similar_lines]))
- min_x = int(min([min(l[0][0], l[0][2]) for l in similar_lines]))
- max_x = int(max([max(l[0][0], l[0][2]) for l in similar_lines]))
-
- # 过滤:水平线必须跨越至少50%的图片宽度
- if (max_x - min_x) > im_w * 0.5:
- merged.append((min_x, avg_y, max_x, avg_y))
- else:
- # 垂直线:合并X坐标相近的线
- x = (x1 + x2) / 2
- similar_lines = [line]
- used.add(i)
-
- for j, other_line in enumerate(lines[i+1:], i+1):
- if j in used:
- continue
- ox1, oy1, ox2, oy2 = other_line[0]
- ox = (ox1 + ox2) / 2
-
- # 如果X坐标相近(在阈值内),合并
- if abs(x - ox) < merge_threshold_v:
- similar_lines.append(other_line)
- used.add(j)
-
- # 计算合并后的线的平均X坐标和Y范围
- avg_x = int(np.mean([(l[0][0] + l[0][2]) / 2 for l in similar_lines]))
- min_y = int(min([min(l[0][1], l[0][3]) for l in similar_lines]))
- max_y = int(max([max(l[0][1], l[0][3]) for l in similar_lines]))
-
- # 过滤:垂直线必须跨越至少50%的图片高度
- if (max_y - min_y) > im_h * 0.5:
- merged.append((avg_x, min_y, avg_x, max_y))
-
- return merged
-
- h_merged = merge_lines(h_lines, is_horizontal=True)
- v_merged = merge_lines(v_lines, is_horizontal=False)
-
- print(f"[DEBUG] 检测到 {len(h_merged)} 条水平线和 {len(v_merged)} 条垂直线")
-
- # 5. 根据水平和垂直线构建格子
- # 添加图片边界线
- h_coords = [0] # 上边界
- for line in h_merged:
- y = line[1]
- if 0 < y < im_h:
- h_coords.append(y)
- h_coords.append(im_h) # 下边界
- h_coords = sorted(set(h_coords))
-
- v_coords = [0] # 左边界
- for line in v_merged:
- x = line[0]
- if 0 < x < im_w:
- v_coords.append(x)
- v_coords.append(im_w) # 右边界
- v_coords = sorted(set(v_coords))
-
- print(f"[DEBUG] 水平分割线Y坐标: {h_coords}")
- print(f"[DEBUG] 垂直分割线X坐标: {v_coords}")
-
- # 6. 构建格子(每行最多两个格子)
- panels = []
- min_panel_area = img_area * 0.02
-
- # 过滤掉太近的水平线(可能是重复检测)
- filtered_h_coords = [h_coords[0]]
- for i in range(1, len(h_coords) - 1):
- if h_coords[i] - filtered_h_coords[-1] > im_h * 0.05: # 至少间隔5%的高度
- filtered_h_coords.append(h_coords[i])
- filtered_h_coords.append(h_coords[-1])
-
- # 过滤掉太近的垂直线(可能是重复检测)
- filtered_v_coords = [v_coords[0]]
- for i in range(1, len(v_coords) - 1):
- if v_coords[i] - filtered_v_coords[-1] > im_w * 0.05: # 至少间隔5%的宽度
- filtered_v_coords.append(v_coords[i])
- filtered_v_coords.append(v_coords[-1])
-
- print(f"[DEBUG] 过滤后: {len(filtered_h_coords)} 条水平分割线, {len(filtered_v_coords)} 条垂直分割线")
-
- # 根据过滤后的坐标构建格子
- for i in range(len(filtered_h_coords) - 1):
- y1 = filtered_h_coords[i]
- y2 = filtered_h_coords[i + 1]
-
- # 一行最多两个格子
- # 找到在这个行范围内的垂直分割线
- row_v_coords = [filtered_v_coords[0]] # 行的左边界
-
- for v_x in filtered_v_coords[1:-1]: # 排除左右边界
- # 检查这条垂直线是否与当前行相交
- # 检查垂直线附近是否有足够的边缘响应
- line_region = combined_edges[y1:y2, max(0, v_x-10):min(im_w, v_x+10)]
- if np.sum(line_region > 0) > (y2 - y1) * 0.2: # 至少20%的区域有边缘
- row_v_coords.append(v_x)
-
- row_v_coords.append(filtered_v_coords[-1]) # 行的右边界
-
- # 如果一行有太多垂直分割线,只保留主要的(每行最多2个格子,所以最多3条垂直线:左、中、右)
- if len(row_v_coords) > 3:
- # 选择最靠近左、中、右位置的线
- left = row_v_coords[0]
- right = row_v_coords[-1]
- mid = (left + right) / 2
-
- # 找到最接近中间位置的垂直线
- closest_mid = min(row_v_coords[1:-1], key=lambda x: abs(x - mid))
- row_v_coords = [left, closest_mid, right]
-
- # 根据垂直分割线创建格子(每行最多两个格子)
- for j in range(len(row_v_coords) - 1):
- x1 = row_v_coords[j]
- x2 = row_v_coords[j + 1]
-
- # 基本过滤
- w = x2 - x1
- h = y2 - y1
- area = w * h
-
- # 过滤:面积太小或高度/宽度太小
- min_height = im_h * 0.08 # 至少占图片高度的8%
- min_width = im_w * 0.15 # 至少占图片宽度的15%
-
- if area < min_panel_area or h < min_height or w < min_width:
- continue
-
- # 验证格子:内侧有内容,外侧是白色
- panel_candidate = {
- 'x': int(x1),
- 'y': int(y1),
- 'width': int(w),
- 'height': int(h),
- 'area': area,
- 'center_x': float(x1 + w / 2),
- 'center_y': float(y1 + h / 2)
- }
-
- # 放宽验证条件,因为我们已经基于线条构建了格子
- if validate_panel(gray, panel_candidate, border_width=max(5, int(min(im_w, im_h) * 0.005))):
- panels.append(panel_candidate)
-
- # 绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0)
- panel_mask = np.ones_like(gray) * 255 # 创建全白的mask
- for panel in panels:
- cv2.rectangle(panel_mask,
- (panel['x'], panel['y']),
- (panel['x'] + panel['width'],
- panel['y'] + panel['height']),
- 0, 4) # 绘制黑色的格子线
-
- return panel_mask, panels
- def detect_panels_from_text_mask(img, text_mask=None):
- """
- 基于文字遮罩图的连通域分析检测格子
- 使用文字遮罩图找到包含文字的大连通区域,这些区域很可能就是格子
-
- 参数:
- img: 输入图片(BGR格式)
- text_mask: 文字遮罩图(灰度图,文字区域为白色255,其他为黑色0)
-
- 返回:
- panel_mask: 格子遮罩图
- panels: 格子列表
- """
- if len(img.shape) == 3:
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- else:
- gray = img.copy()
-
- im_h, im_w = gray.shape
- img_area = im_h * im_w
-
- # 如果没有提供文字遮罩图,使用传统方法
- if text_mask is None:
- return detect_comic_panels(img)
-
- # 确保文字遮罩图尺寸匹配
- if text_mask.shape[:2] != (im_h, im_w):
- text_mask = cv2.resize(text_mask, (im_w, im_h))
-
- # 方法:基于文字遮罩图的连通域分析
- # 1. 对文字遮罩图进行膨胀,连接相近的文字区域
- # 2. 使用连通域分析找到包含文字的大区域
- # 3. 扩展这些区域以包含周围的空白区域
- # 4. 验证每个区域是否符合格子特征
-
- # 对文字遮罩图进行膨胀,连接相近的文字区域
- kernel_size = max(5, int(min(im_w, im_h) * 0.01))
- kernel = np.ones((kernel_size, kernel_size), np.uint8)
- dilated_mask = cv2.dilate(text_mask, kernel, iterations=3)
- dilated_mask = cv2.morphologyEx(dilated_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
-
- # 使用连通域分析找到包含文字的大区域
- connectivity = 8
- num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
- dilated_mask, connectivity, cv2.CV_32S
- )
-
- print(f"[DEBUG] 文字遮罩图连通域分析:找到 {num_labels - 1} 个连通区域(排除背景)")
-
- panels = []
- min_panel_area = img_area * 0.02 # 最小格子面积(2%)
- max_panel_area = img_area * 0.50 # 最大格子面积(50%)
-
- # 遍历所有连通区域(跳过背景标签0)
- for label_index in range(1, num_labels):
- stat = stats[label_index]
- x, y, w, h, area = stat
-
- # 基本过滤:面积太小或太大
- if area < min_panel_area or area > max_panel_area:
- continue
-
- # 计算宽高比
- aspect_ratio = w / h if h > 0 else 0
- if aspect_ratio < 0.15 or aspect_ratio > 6.0:
- continue
-
- # 扩展边界框以包含周围的空白区域
- # 扩展比例:向四周扩展文字区域尺寸的 30-50%
- expand_x = int(w * 0.4)
- expand_y = int(h * 0.4)
-
- panel_x1 = max(0, x - expand_x)
- panel_y1 = max(0, y - expand_y)
- panel_x2 = min(im_w, x + w + expand_x)
- panel_y2 = min(im_h, y + h + expand_y)
-
- panel_w = panel_x2 - panel_x1
- panel_h = panel_y2 - panel_y1
- panel_area = panel_w * panel_h
-
- # 验证格子
- panel_candidate = {
- 'x': panel_x1,
- 'y': panel_y1,
- 'width': panel_w,
- 'height': panel_h,
- 'area': panel_area,
- 'center_x': float(panel_x1 + panel_w / 2),
- 'center_y': float(panel_y1 + panel_h / 2)
- }
-
- if validate_panel(gray, panel_candidate, border_width=max(10, int(min(im_w, im_h) * 0.01))):
- panels.append(panel_candidate)
-
- # 合并重叠的格子
- merged_panels = []
- for panel in panels:
- merged = False
- for i, existing in enumerate(merged_panels):
- # 计算重叠
- overlap_x = max(0, min(panel['x'] + panel['width'],
- existing['x'] + existing['width']) -
- max(panel['x'], existing['x']))
- overlap_y = max(0, min(panel['y'] + panel['height'],
- existing['y'] + existing['height']) -
- max(panel['y'], existing['y']))
- overlap_area = overlap_x * overlap_y
-
- # 如果重叠面积超过较小格子的50%,合并它们
- min_area_for_merge = min(panel['area'], existing['area'])
- if overlap_area > min_area_for_merge * 0.5:
- # 合并:取两个格子的并集
- new_x = min(panel['x'], existing['x'])
- new_y = min(panel['y'], existing['y'])
- new_w = max(panel['x'] + panel['width'],
- existing['x'] + existing['width']) - new_x
- new_h = max(panel['y'] + panel['height'],
- existing['y'] + existing['height']) - new_y
-
- merged_panels[i] = {
- 'x': int(new_x),
- 'y': int(new_y),
- 'width': int(new_w),
- 'height': int(new_h),
- 'center_x': float(new_x + new_w / 2),
- 'center_y': float(new_y + new_h / 2)
- }
- merged = True
- break
-
- if not merged:
- merged_panels.append(panel)
-
- # 绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0)
- panel_mask = np.ones_like(gray) * 255 # 创建全白的mask
- for panel in merged_panels:
- cv2.rectangle(panel_mask,
- (panel['x'], panel['y']),
- (panel['x'] + panel['width'],
- panel['y'] + panel['height']),
- 0, 4) # 绘制黑色的格子线
-
- return panel_mask, merged_panels
- def detect_panels_from_text_blocks(img, text_blocks=None):
- """
- 基于文字块位置检测格子
- 如果提供了文字块列表,使用它们来推断格子边界
-
- 参数:
- img: 输入图片(BGR格式)
- text_blocks: 文字块列表(可选),每个元素包含 xyxy 坐标 [x1, y1, x2, y2]
-
- 返回:
- panel_mask: 格子遮罩图
- panels: 格子列表
- """
- if len(img.shape) == 3:
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- else:
- gray = img.copy()
-
- im_h, im_w = gray.shape
- img_area = im_h * im_w
-
- # 如果没有提供文字块,使用传统方法
- if text_blocks is None or len(text_blocks) == 0:
- return detect_comic_panels(img)
-
- # 方法:基于文字块聚类和区域扩展
- # 1. 根据文字块位置进行聚类(使用简单的距离阈值)
- # 2. 对每个聚类,扩展边界框以包含周围的空白区域
- # 3. 验证扩展后的区域是否符合格子特征
-
- panels = []
- used_blocks = set()
-
- # 计算文字块之间的距离,进行聚类
- min_panel_area = img_area * 0.02 # 最小格子面积
- max_panel_area = img_area * 0.50 # 最大格子面积
-
- for i, block in enumerate(text_blocks):
- if i in used_blocks:
- continue
-
- # 获取文字块的边界框
- if isinstance(block, dict):
- x1, y1, x2, y2 = block.get('xyxy', block.get('bbox', [0, 0, 0, 0]))
- else:
- x1, y1, x2, y2 = block[:4] if len(block) >= 4 else [0, 0, 0, 0]
-
- if x2 <= x1 or y2 <= y1:
- continue
-
- # 找到与当前文字块相邻的其他文字块(聚类)
- cluster_blocks = [i]
- used_blocks.add(i)
-
- # 扩展搜索范围:查找附近的文字块
- search_margin = max((x2 - x1) * 2, (y2 - y1) * 2, 100)
-
- for j, other_block in enumerate(text_blocks):
- if j in used_blocks or j == i:
- continue
-
- if isinstance(other_block, dict):
- ox1, oy1, ox2, oy2 = other_block.get('xyxy', other_block.get('bbox', [0, 0, 0, 0]))
- else:
- ox1, oy1, ox2, oy2 = other_block[:4] if len(other_block) >= 4 else [0, 0, 0, 0]
-
- if ox2 <= ox1 or oy2 <= oy1:
- continue
-
- # 计算两个文字块的距离
- center_x = (x1 + x2) / 2
- center_y = (y1 + y2) / 2
- o_center_x = (ox1 + ox2) / 2
- o_center_y = (oy1 + oy2) / 2
-
- distance = np.sqrt((center_x - o_center_x)**2 + (center_y - o_center_y)**2)
-
- # 如果距离在搜索范围内,加入聚类
- if distance < search_margin:
- cluster_blocks.append(j)
- used_blocks.add(j)
-
- # 计算聚类的边界框
- def get_bbox(block):
- if isinstance(block, dict):
- return block.get('xyxy', block.get('bbox', [0, 0, 0, 0]))
- else:
- return block[:4] if len(block) >= 4 else [0, 0, 0, 0]
-
- cluster_bboxes = [get_bbox(text_blocks[b]) for b in cluster_blocks]
- cluster_x1 = min([bbox[0] for bbox in cluster_bboxes])
- cluster_y1 = min([bbox[1] for bbox in cluster_bboxes])
- cluster_x2 = max([bbox[2] for bbox in cluster_bboxes])
- cluster_y2 = max([bbox[3] for bbox in cluster_bboxes])
-
- # 扩展边界框以包含周围的空白区域
- # 扩展比例:向四周扩展文字块尺寸的 20-50%
- expand_x = int((cluster_x2 - cluster_x1) * 0.3)
- expand_y = int((cluster_y2 - cluster_y1) * 0.3)
-
- panel_x1 = max(0, int(cluster_x1 - expand_x))
- panel_y1 = max(0, int(cluster_y1 - expand_y))
- panel_x2 = min(im_w, int(cluster_x2 + expand_x))
- panel_y2 = min(im_h, int(cluster_y2 + expand_y))
-
- panel_w = panel_x2 - panel_x1
- panel_h = panel_y2 - panel_y1
- panel_area = panel_w * panel_h
-
- # 基本过滤
- if panel_area < min_panel_area or panel_area > max_panel_area:
- continue
-
- # 验证格子
- panel_candidate = {
- 'x': panel_x1,
- 'y': panel_y1,
- 'width': panel_w,
- 'height': panel_h,
- 'area': panel_area,
- 'center_x': float(panel_x1 + panel_w / 2),
- 'center_y': float(panel_y1 + panel_h / 2)
- }
-
- if validate_panel(gray, panel_candidate, border_width=max(10, int(min(im_w, im_h) * 0.01))):
- panels.append(panel_candidate)
-
- # 绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0)
- panel_mask = np.ones_like(gray) * 255 # 创建全白的mask
- for panel in panels:
- cv2.rectangle(panel_mask,
- (panel['x'], panel['y']),
- (panel['x'] + panel['width'],
- panel['y'] + panel['height']),
- 0, 4) # 绘制黑色的格子线
-
- return panel_mask, panels
- def detect_comic_panels(img, text_blocks=None, text_mask=None):
- """
- 使用opencv检测漫画格子(分镜框)- 改进版
- 策略:优先使用基于白色边界的方法,其次使用文字遮罩图,最后使用传统方法
-
- 参数:
- img: 输入图片(BGR格式)
- text_blocks: 文字块列表(可选),来自 comic-text-detector
- text_mask: 文字遮罩图(可选),来自 comic-text-detector
-
- 返回:
- panel_mask: 格子遮罩图(灰度图,格子线为黑色0,其他为白色255)
- panels: 格子列表,每个格子包含边界框信息
- """
- # 优先使用基于白色边界的方法(利用漫画边缘一定是白色的特点)
- try:
- print(f"[DEBUG] 尝试使用基于白色边界的方法...")
- panel_mask, panels = detect_panels_from_white_borders(img)
- print(f"[DEBUG] 基于白色边界的方法检测到 {len(panels)} 个格子")
- if len(panels) >= 4: # 如果检测到足够多的格子,使用这个方法
- print(f"[DEBUG] 使用基于白色边界的方法,检测到 {len(panels)} 个格子")
- return panel_mask, panels
- else:
- print(f"[DEBUG] 基于白色边界的方法检测到的格子数量不足({len(panels)}个),尝试其他方法")
- except Exception as e:
- print(f"[WARN] 基于白色边界的方法失败: {e}")
- import traceback
- traceback.print_exc()
-
- # 其次使用基于文字遮罩图的方法
- if text_mask is not None:
- try:
- print(f"[DEBUG] 尝试使用基于文字遮罩图的连通域分析方法...")
- panel_mask, panels = detect_panels_from_text_mask(img, text_mask)
- print(f"[DEBUG] 基于文字遮罩图的方法检测到 {len(panels)} 个格子")
- if len(panels) >= 4: # 如果检测到足够多的格子,使用这个方法
- print(f"[DEBUG] 使用基于文字遮罩图的方法,检测到 {len(panels)} 个格子")
- return panel_mask, panels
- else:
- print(f"[DEBUG] 基于文字遮罩图的方法检测到的格子数量不足({len(panels)}个),尝试其他方法")
- except Exception as e:
- print(f"[WARN] 基于文字遮罩图的方法失败: {e}")
- import traceback
- traceback.print_exc()
-
- # 再次使用基于文字块的方法
- if text_blocks is not None and len(text_blocks) > 0:
- try:
- print(f"[DEBUG] 尝试使用基于文字块的方法,文字块数量: {len(text_blocks)}")
- panel_mask, panels = detect_panels_from_text_blocks(img, text_blocks)
- print(f"[DEBUG] 基于文字块的方法检测到 {len(panels)} 个格子")
- if len(panels) >= 4: # 如果检测到足够多的格子,使用这个方法
- print(f"[DEBUG] 使用基于文字块的方法,检测到 {len(panels)} 个格子")
- return panel_mask, panels
- else:
- print(f"[DEBUG] 基于文字块的方法检测到的格子数量不足({len(panels)}个),使用传统方法")
- except Exception as e:
- print(f"[WARN] 基于文字块的方法失败,使用传统方法: {e}")
- import traceback
- traceback.print_exc()
-
- # 传统方法:基于边缘检测和轮廓分析
- # 转换为灰度图
- if len(img.shape) == 3:
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- else:
- gray = img.copy()
-
- im_h, im_w = gray.shape
- img_area = im_h * im_w
-
- # 第一步:放宽条件,检测所有可能的候选格子
- # 方法1: 改进的边缘检测(更宽松)
- blurred = cv2.GaussianBlur(gray, (5, 5), 0)
-
- # 使用自适应阈值(更敏感)
- adaptive_thresh = cv2.adaptiveThreshold(
- blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
- cv2.THRESH_BINARY_INV, 11, 2
- )
-
- # Canny边缘检测(更敏感的参数)
- edges = cv2.Canny(blurred, 20, 80, apertureSize=3)
-
- # 合并自适应阈值和Canny边缘
- combined_edges = cv2.bitwise_or(edges, adaptive_thresh)
-
- # 方法2: 检测水平线和垂直线(更宽松的kernel)
- h_kernel_size = max(int(im_w * 0.03), 20) # 水平线kernel宽度(更小,更敏感)
- v_kernel_size = max(int(im_h * 0.03), 20) # 垂直线kernel高度(更小,更敏感)
-
- # 检测水平线
- horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_kernel_size, 1))
- horizontal_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, horizontal_kernel)
- horizontal_lines = cv2.dilate(horizontal_lines, horizontal_kernel, iterations=3) # 增加迭代次数
-
- # 检测垂直线
- vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_kernel_size))
- vertical_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, vertical_kernel)
- vertical_lines = cv2.dilate(vertical_lines, vertical_kernel, iterations=3) # 增加迭代次数
-
- # 合并水平和垂直线
- lines_mask = cv2.bitwise_or(horizontal_lines, vertical_lines)
-
- # 对边缘进行膨胀,连接断开的线条(更激进)
- kernel_size = max(5, int(min(im_w, im_h) * 0.005))
- kernel = np.ones((kernel_size, kernel_size), np.uint8)
- dilated = cv2.dilate(lines_mask, kernel, iterations=5) # 增加迭代次数
- dilated = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, kernel, iterations=3) # 增加迭代次数
-
- # 如果线条mask太稀疏,尝试使用更直接的方法:基于灰度值的分割
- # 使用阈值分割,找到可能的格子区域
- _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-
- # 合并线条mask和阈值分割结果
- combined_mask = cv2.bitwise_or(dilated, thresh)
-
- # 查找轮廓(使用RETR_TREE获取所有轮廓,包括嵌套的)
- contours, hierarchy = cv2.findContours(
- combined_mask,
- cv2.RETR_TREE,
- cv2.CHAIN_APPROX_SIMPLE
- )
-
- print(f"[DEBUG] 找到 {len(contours)} 个轮廓")
-
- # 第二步:放宽条件,收集所有候选格子
- candidate_panels = []
- min_area = img_area * 0.005 # 非常宽松:至少占图片0.5%的面积
- max_area = img_area * 0.95 # 最大不超过95%
-
- for contour in contours:
- # 计算轮廓的边界框
- x, y, w, h = cv2.boundingRect(contour)
- area = w * h
-
- # 基本过滤(非常宽松)
- if area < min_area or area > max_area:
- continue
-
- # 计算宽高比(非常宽松的范围)
- aspect_ratio = w / h if h > 0 else 0
- if aspect_ratio < 0.1 or aspect_ratio > 10.0:
- continue
-
- candidate_panels.append({
- 'x': int(x),
- 'y': int(y),
- 'width': int(w),
- 'height': int(h),
- 'area': area,
- 'aspect_ratio': aspect_ratio,
- 'center_x': float(x + w / 2),
- 'center_y': float(y + h / 2)
- })
-
- # 按面积排序,优先选择较大的区域(更可能是主要格子)
- candidate_panels.sort(key=lambda p: p['area'], reverse=True)
-
- # 如果候选格子太多,只保留前100个最大的(避免验证太多小区域)
- if len(candidate_panels) > 100:
- candidate_panels = candidate_panels[:100]
-
- print(f"[DEBUG] 检测到 {len(candidate_panels)} 个候选格子(已按面积排序)")
-
- # 第三步:验证每个候选格子是否符合要求(内侧有画面,外侧完全空白)
- valid_panels = []
- border_width = max(10, int(min(im_w, im_h) * 0.01))
-
- for i, candidate in enumerate(candidate_panels):
- is_valid = validate_panel(gray, candidate, border_width=border_width)
- if is_valid:
- valid_panels.append(candidate)
- # 只对前10个候选格子输出调试信息
- if i < 10:
- area_ratio = (candidate['area'] / img_area) * 100
- print(f"[DEBUG] 候选格子 {i+1}: 面积={candidate['area']:.0f} ({area_ratio:.2f}%), "
- f"尺寸={candidate['width']}x{candidate['height']}, "
- f"位置=({candidate['x']}, {candidate['y']}), 验证={'通过' if is_valid else '失败'}")
-
- print(f"[DEBUG] 验证后保留 {len(valid_panels)} 个有效格子")
-
- # 第四步:合并重叠和相邻的格子
- panels = []
- for candidate in valid_panels:
- merged = False
-
- # 检查是否可以与已有格子合并
- for i, existing in enumerate(panels):
- # 计算重叠或相邻关系
- overlap_x = max(0, min(candidate['x'] + candidate['width'],
- existing['x'] + existing['width']) -
- max(candidate['x'], existing['x']))
- overlap_y = max(0, min(candidate['y'] + candidate['height'],
- existing['y'] + existing['height']) -
- max(candidate['y'], existing['y']))
- overlap_area = overlap_x * overlap_y
-
- # 计算已有格子的面积
- existing_area = existing['width'] * existing['height']
-
- # 如果重叠面积超过较小格子的30%,合并它们
- min_area_for_merge = min(candidate['area'], existing_area)
- if overlap_area > min_area_for_merge * 0.3:
- # 合并:取两个格子的并集
- new_x = min(candidate['x'], existing['x'])
- new_y = min(candidate['y'], existing['y'])
- new_w = max(candidate['x'] + candidate['width'],
- existing['x'] + existing['width']) - new_x
- new_h = max(candidate['y'] + candidate['height'],
- existing['y'] + existing['height']) - new_y
-
- panels[i] = {
- 'x': int(new_x),
- 'y': int(new_y),
- 'width': int(new_w),
- 'height': int(new_h),
- 'center_x': float(new_x + new_w / 2),
- 'center_y': float(new_y + new_h / 2)
- }
- merged = True
- break
-
- # 如果不能合并,且不与已有格子重叠太多,添加为新格子
- if not merged:
- overlap_with_existing = False
- for existing in panels:
- overlap_x = max(0, min(candidate['x'] + candidate['width'],
- existing['x'] + existing['width']) -
- max(candidate['x'], existing['x']))
- overlap_y = max(0, min(candidate['y'] + candidate['height'],
- existing['y'] + existing['height']) -
- max(candidate['y'], existing['y']))
- overlap_area = overlap_x * overlap_y
- existing_area = existing['width'] * existing['height']
- min_area_check = min(candidate['area'], existing_area)
-
- # 如果重叠超过50%,跳过(可能是子区域)
- if overlap_area > min_area_check * 0.5:
- overlap_with_existing = True
- break
-
- if not overlap_with_existing:
- panels.append({
- 'x': candidate['x'],
- 'y': candidate['y'],
- 'width': candidate['width'],
- 'height': candidate['height'],
- 'center_x': candidate['center_x'],
- 'center_y': candidate['center_y']
- })
-
- # 第五步:绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0)
- panel_mask = np.ones_like(gray) * 255 # 创建全白的mask
- for panel in panels:
- cv2.rectangle(panel_mask,
- (panel['x'], panel['y']),
- (panel['x'] + panel['width'],
- panel['y'] + panel['height']),
- 0, 4) # 绘制黑色的格子线
-
- return panel_mask, panels
- def merge_panel_mask_with_text_mask(panel_mask, text_mask):
- """
- 合并格子遮罩图和文字mask图
-
- 参数:
- panel_mask: 格子遮罩图(格子线为黑色0,其他为白色255)
- text_mask: 文字mask图
-
- 返回:
- combined_mask: 合并后的mask图
- """
- # 确保两个mask尺寸一致
- if panel_mask.shape != text_mask.shape:
- panel_mask = cv2.resize(panel_mask, (text_mask.shape[1], text_mask.shape[0]))
-
- # 合并:格子线(黑色0)和文字mask(非零部分)都保留
- # 格子遮罩图中,格子线是黑色(0),其他是白色(255)
- # 文字mask中,文字区域是非零值(通常是白色255)
- # 合并策略:将panel_mask反转(黑线变白线),然后与text_mask合并(取最大值)
- # 这样格子线(白色)和文字(白色)都会保留,背景为黑色
- panel_mask_inv = cv2.bitwise_not(panel_mask) # 反转:黑线(0)变白线(255),白底(255)变黑底(0)
- combined_mask = np.maximum(panel_mask_inv, text_mask) # 合并:保留格子线(白色)和文字(白色)
-
- return combined_mask
- if __name__ == '__main__':
- import argparse
-
- parser = argparse.ArgumentParser(description='检测漫画格子并生成遮罩图')
- parser.add_argument('image', help='输入图片路径')
- parser.add_argument('-o', '--output', help='输出目录')
- parser.add_argument('--text-mask', help='文字mask图片路径(可选,用于合并)')
- parser.add_argument('--text-blocks', help='文字块JSON文件路径(可选,用于辅助格子检测)')
-
- args = parser.parse_args()
-
- image_path = Path(args.image)
- if not image_path.exists():
- print(f"[ERROR] 图片文件不存在: {image_path}")
- sys.exit(1)
-
- # 读取图片(处理中文路径)
- # 在Windows上,cv2.imread可能无法直接读取包含中文的路径
- # 使用numpy先读取文件,然后解码
- try:
- import numpy as np
- with open(str(image_path), 'rb') as f:
- img_data = np.frombuffer(f.read(), np.uint8)
- img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
- except Exception as e:
- # 如果上述方法失败,尝试直接读取
- img = cv2.imread(str(image_path))
-
- if img is None:
- print(f"[ERROR] 无法读取图片文件: {image_path}")
- print(f"[DEBUG] 尝试使用绝对路径: {image_path.resolve()}")
- sys.exit(1)
-
- # 尝试读取文字块信息(如果提供,或自动查找)
- text_blocks = None
-
- # 如果提供了文字块文件路径,使用它
- if args.text_blocks:
- text_blocks_path = Path(args.text_blocks)
- else:
- # 否则,尝试自动查找文字块JSON文件
- # 查找可能的文件名:{image_name}_dialogues.json, {image_name}_text_blocks.json
- # 优先从tmp目录查找,然后从output_dir查找
- image_name = image_path.stem
- output_dir = Path(args.output) if args.output else image_path.parent
- tmp_dir = output_dir / 'tmp'
-
- possible_names = [
- tmp_dir / f"{image_name}_dialogues.json", # 优先从tmp目录查找
- output_dir / f"{image_name}_dialogues.json",
- output_dir / f"{image_name}_text_blocks.json",
- image_path.parent / f"{image_name}_dialogues.json",
- image_path.parent / f"{image_name}_text_blocks.json",
- ]
-
- text_blocks_path = None
- for possible_path in possible_names:
- if possible_path.exists():
- text_blocks_path = possible_path
- print(f"[INFO] 自动找到文字块文件: {text_blocks_path}")
- break
-
- # 读取文字块信息
- if text_blocks_path and text_blocks_path.exists():
- try:
- with open(text_blocks_path, 'r', encoding='utf-8') as f:
- text_blocks_data = json.load(f)
- # 尝试从不同格式的JSON中提取文字块信息
- if isinstance(text_blocks_data, list):
- text_blocks = text_blocks_data
- elif isinstance(text_blocks_data, dict):
- # 可能是包含 dialogues 或其他字段的格式
- if 'dialogues' in text_blocks_data:
- text_blocks = text_blocks_data['dialogues']
- elif 'text_blocks' in text_blocks_data:
- text_blocks = text_blocks_data['text_blocks']
- elif 'blocks' in text_blocks_data:
- text_blocks = text_blocks_data['blocks']
-
- # 转换文字块格式为统一格式
- if text_blocks:
- formatted_blocks = []
- for block in text_blocks:
- if isinstance(block, dict):
- # 尝试提取 bbox 或 xyxy
- if 'bbox' in block:
- bbox = block['bbox']
- formatted_blocks.append({
- 'xyxy': [bbox['x1'], bbox['y1'], bbox['x2'], bbox['y2']]
- })
- elif 'xyxy' in block:
- formatted_blocks.append({'xyxy': block['xyxy']})
- text_blocks = formatted_blocks if formatted_blocks else text_blocks
- print(f"[INFO] 从JSON文件读取到 {len(text_blocks)} 个文字块")
- except Exception as e:
- print(f"[WARN] 无法读取文字块JSON文件: {e}")
- text_blocks = None
-
- # 尝试读取文字遮罩图(如果提供,或自动查找)
- text_mask = None
-
- # 如果提供了文字遮罩图路径,使用它
- if args.text_mask:
- text_mask_path = Path(args.text_mask)
- if text_mask_path.exists():
- try:
- text_mask = cv2.imread(str(text_mask_path), cv2.IMREAD_GRAYSCALE)
- if text_mask is not None:
- print(f"[INFO] 从文件读取文字遮罩图: {text_mask_path}")
- except Exception as e:
- print(f"[WARN] 无法读取文字遮罩图: {e}")
- else:
- # 否则,尝试自动查找文字遮罩图
- # 优先从tmp目录查找,然后从output_dir查找
- image_name = image_path.stem
- output_dir = Path(args.output) if args.output else image_path.parent
- tmp_dir = output_dir / 'tmp'
-
- possible_names = [
- tmp_dir / f"{image_name}_text_mask.png", # 优先从tmp目录查找
- output_dir / f"{image_name}_text_mask.png",
- image_path.parent / f"{image_name}_text_mask.png",
- ]
-
- for possible_path in possible_names:
- if possible_path.exists():
- try:
- text_mask = cv2.imread(str(possible_path), cv2.IMREAD_GRAYSCALE)
- if text_mask is not None:
- print(f"[INFO] 自动找到文字遮罩图: {possible_path}")
- break
- except Exception as e:
- continue
-
- # 检测格子
- print(f"[INFO] 正在检测漫画格子: {image_path.name}")
- panel_mask, panels = detect_comic_panels(img, text_blocks=text_blocks, text_mask=text_mask)
- print(f"[OK] 检测到 {len(panels)} 个格子")
-
- # 设置输出目录
- if args.output:
- output_dir = Path(args.output)
- output_dir.mkdir(parents=True, exist_ok=True)
- else:
- output_dir = image_path.parent
-
- # 如果output_dir已经是tmp目录,直接使用它;否则创建tmp子目录
- # 检查路径的最后一部分是否是'tmp'(支持相对路径和绝对路径)
- output_dir_str = str(output_dir)
- if output_dir_str.endswith('tmp') or output_dir_str.endswith('tmp\\') or output_dir_str.endswith('tmp/'):
- tmp_dir = output_dir
- else:
- tmp_dir = output_dir / 'tmp'
- tmp_dir.mkdir(parents=True, exist_ok=True)
-
- image_name = image_path.stem
-
- # 保存格子遮罩图到tmp目录(中间文件)
- panel_mask_path = tmp_dir / f"{image_name}_panel_mask.png"
- # 使用cv2.imencode处理中文路径
- success, encoded_img = cv2.imencode('.png', panel_mask)
- if success:
- with open(str(panel_mask_path), 'wb') as f:
- f.write(encoded_img.tobytes())
- print(f"[OK] 已保存格子遮罩图: {panel_mask_path}")
- else:
- print(f"[ERROR] 保存格子遮罩图失败: {panel_mask_path}")
-
- # 如果提供了文字mask,进行合并
- if args.text_mask:
- text_mask_path = Path(args.text_mask)
- if text_mask_path.exists():
- # 使用np.fromfile处理中文路径
- text_mask_array = np.fromfile(str(text_mask_path), dtype=np.uint8)
- text_mask = cv2.imdecode(text_mask_array, cv2.IMREAD_GRAYSCALE)
- if text_mask is not None:
- combined_mask = merge_panel_mask_with_text_mask(panel_mask, text_mask)
- combined_mask_path = tmp_dir / f"{image_name}_combined_mask.png"
- # 使用cv2.imencode处理中文路径
- success, encoded_img = cv2.imencode('.png', combined_mask)
- if success:
- with open(str(combined_mask_path), 'wb') as f:
- f.write(encoded_img.tobytes())
- print(f"[OK] 已保存合并后的mask图: {combined_mask_path}")
- else:
- print(f"[ERROR] 保存合并后的mask图失败: {combined_mask_path}")
- else:
- print(f"[WARN] 无法读取文字mask图: {text_mask_path}")
- else:
- print(f"[WARN] 文字mask图不存在: {text_mask_path}")
-
- # 保存格子信息JSON到tmp目录(中间文件)
- panels_json = {
- 'image_file': image_path.name,
- 'panels': panels,
- 'total_count': len(panels)
- }
- json_path = tmp_dir / f"{image_name}_panels.json"
- with open(json_path, 'w', encoding='utf-8') as f:
- json.dump(panels_json, f, ensure_ascii=False, indent=2)
- print(f"[OK] 已保存格子信息: {json_path}")
|