detect_panels.py 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
  1. # -*- coding: utf-8 -*-
  2. """
  3. 使用opencv检测漫画格子(分镜框)
  4. """
  5. import sys
  6. import json
  7. from pathlib import Path
  8. import cv2
  9. import numpy as np
  10. # Windows编码修复
  11. if sys.platform == 'win32':
  12. import io
  13. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
  14. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
  15. def validate_panel(gray, panel, border_width=10):
  16. """
  17. 验证格子是否符合要求:内侧有画面,外侧完全空白
  18. 参数:
  19. gray: 灰度图
  20. panel: 格子信息字典,包含 x, y, width, height
  21. border_width: 检查外侧区域的宽度(像素)
  22. 返回:
  23. bool: True表示符合要求,False表示不符合
  24. """
  25. im_h, im_w = gray.shape
  26. x = panel['x']
  27. y = panel['y']
  28. w = panel['width']
  29. h = panel['height']
  30. # 确保坐标在图片范围内
  31. x = max(0, min(im_w - 1, x))
  32. y = max(0, min(im_h - 1, y))
  33. w = min(w, im_w - x)
  34. h = min(h, im_h - y)
  35. if w <= 0 or h <= 0:
  36. return False
  37. # 1. 检查内侧(格子内部)是否有画面内容
  38. # 内侧区域:稍微缩小一点,避免边界影响
  39. inner_margin = max(2, int(min(w, h) * 0.05))
  40. inner_x1 = x + inner_margin
  41. inner_y1 = y + inner_margin
  42. inner_x2 = x + w - inner_margin
  43. inner_y2 = y + h - inner_margin
  44. if inner_x2 <= inner_x1 or inner_y2 <= inner_y1:
  45. return False
  46. # 提取内侧区域
  47. inner_region = gray[inner_y1:inner_y2, inner_x1:inner_x2]
  48. # 计算内侧的平均亮度和标准差
  49. inner_mean = np.mean(inner_region)
  50. inner_std = np.std(inner_region)
  51. # 内侧应该有内容:平均亮度不能太高(< 250),或者标准差要足够大(> 5)
  52. # 放宽条件:如果平均亮度很高且标准差很低,说明是纯白色区域,没有内容
  53. if inner_mean > 250 and inner_std < 5:
  54. return False # 内侧是纯白色,没有内容
  55. # 2. 检查外侧(格子边界外)是否完全空白
  56. # 外侧区域:格子边界外的 border_width 像素宽度
  57. outer_regions = []
  58. # 上侧外侧区域
  59. if y >= border_width:
  60. outer_top = gray[max(0, y - border_width):y, x:min(im_w, x + w)]
  61. if outer_top.size > 0:
  62. outer_regions.append(('top', outer_top))
  63. # 下侧外侧区域
  64. if y + h + border_width <= im_h:
  65. outer_bottom = gray[y + h:min(im_h, y + h + border_width), x:min(im_w, x + w)]
  66. if outer_bottom.size > 0:
  67. outer_regions.append(('bottom', outer_bottom))
  68. # 左侧外侧区域
  69. if x >= border_width:
  70. outer_left = gray[y:min(im_h, y + h), max(0, x - border_width):x]
  71. if outer_left.size > 0:
  72. outer_regions.append(('left', outer_left))
  73. # 右侧外侧区域
  74. if x + w + border_width <= im_w:
  75. outer_right = gray[y:min(im_h, y + h), x + w:min(im_w, x + w + border_width)]
  76. if outer_right.size > 0:
  77. outer_regions.append(('right', outer_right))
  78. # 如果没有任何外侧区域可以检查(格子太靠近边缘),放宽条件:允许通过
  79. if len(outer_regions) == 0:
  80. # 如果格子很大(占图片面积超过5%),可能是主要格子,允许通过
  81. if (w * h) > (im_w * im_h * 0.05):
  82. return True
  83. return False
  84. # 检查所有外侧区域是否都是白色(完全空白)
  85. # 放宽条件:外侧应该是白色:平均亮度 > 200,标准差 < 30
  86. # 至少有一半的外侧区域是白色即可
  87. white_count = 0
  88. for side_name, outer_region in outer_regions:
  89. if outer_region.size == 0:
  90. continue
  91. outer_mean = np.mean(outer_region)
  92. outer_std = np.std(outer_region)
  93. # 如果平均亮度足够高且标准差足够小,认为是白色区域
  94. if outer_mean > 200 and outer_std < 30:
  95. white_count += 1
  96. # 至少有一半的外侧区域是白色,就认为符合要求
  97. if white_count >= len(outer_regions) * 0.5:
  98. return True
  99. # 如果格子很大(占图片面积超过2%),即使外侧不完全符合,也允许通过(可能是主要格子)
  100. if (w * h) > (im_w * im_h * 0.02):
  101. return True
  102. return False
  103. def detect_panels_from_white_borders(img):
  104. """
  105. 基于漫画最外面区域一定是白色的特点检测格子
  106. 策略:
  107. 1. 识别图片边缘的白色区域
  108. 2. 一行最多两个格子,先识别每个格子的两条边界线
  109. 3. 找与这两条线成90°的直线段,组成完整的格子
  110. 参数:
  111. img: 输入图片(BGR格式)
  112. 返回:
  113. panel_mask: 格子遮罩图
  114. panels: 格子列表
  115. """
  116. if len(img.shape) == 3:
  117. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  118. else:
  119. gray = img.copy()
  120. im_h, im_w = gray.shape
  121. img_area = im_h * im_w
  122. # 1. 识别图片边缘的白色区域
  123. # 检查四个边缘区域(上、下、左、右)是否为白色
  124. border_width = max(10, int(min(im_w, im_h) * 0.02))
  125. # 上边缘
  126. top_region = gray[0:border_width, :]
  127. top_white = np.mean(top_region) > 240
  128. # 下边缘
  129. bottom_region = gray[im_h-border_width:im_h, :]
  130. bottom_white = np.mean(bottom_region) > 240
  131. # 左边缘
  132. left_region = gray[:, 0:border_width]
  133. left_white = np.mean(left_region) > 240
  134. # 右边缘
  135. right_region = gray[:, im_w-border_width:im_w]
  136. right_white = np.mean(right_region) > 240
  137. print(f"[DEBUG] 边缘白色检测: 上={top_white}, 下={bottom_white}, 左={left_white}, 右={right_white}")
  138. # 2. 检测水平和垂直线(格子边界线)
  139. blurred = cv2.GaussianBlur(gray, (5, 5), 0)
  140. # 使用自适应阈值
  141. adaptive_thresh = cv2.adaptiveThreshold(
  142. blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
  143. cv2.THRESH_BINARY_INV, 11, 2
  144. )
  145. # Canny边缘检测
  146. edges = cv2.Canny(blurred, 30, 100, apertureSize=3)
  147. # 合并
  148. combined_edges = cv2.bitwise_or(edges, adaptive_thresh)
  149. # 检测水平线(用于分割行)
  150. h_kernel_size = max(int(im_w * 0.05), 30)
  151. horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_kernel_size, 1))
  152. horizontal_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, horizontal_kernel)
  153. horizontal_lines = cv2.dilate(horizontal_lines, horizontal_kernel, iterations=2)
  154. # 检测垂直线(用于分割列,一行最多两个格子)
  155. v_kernel_size = max(int(im_h * 0.05), 30)
  156. vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_kernel_size))
  157. vertical_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, vertical_kernel)
  158. vertical_lines = cv2.dilate(vertical_lines, vertical_kernel, iterations=2)
  159. # 3. 使用霍夫直线检测,找到主要的水平和垂直线
  160. min_line_length = max(int(min(im_w, im_h) * 0.15), 50) # 增加最小长度
  161. # 检测水平线(提高阈值,只检测主要的水平分割线)
  162. h_lines = cv2.HoughLinesP(
  163. horizontal_lines,
  164. 1,
  165. np.pi/180,
  166. threshold=max(int(min(im_w, im_h) * 0.25), 80), # 提高阈值
  167. minLineLength=min_line_length,
  168. maxLineGap=max(int(min(im_w, im_h) * 0.03), 10)
  169. )
  170. # 检测垂直线(提高阈值,只检测主要的垂直分割线)
  171. v_lines = cv2.HoughLinesP(
  172. vertical_lines,
  173. 1,
  174. np.pi/180,
  175. threshold=max(int(min(im_w, im_h) * 0.25), 80), # 提高阈值
  176. minLineLength=min_line_length,
  177. maxLineGap=max(int(min(im_w, im_h) * 0.03), 10)
  178. )
  179. # 4. 合并相近的水平线和垂直线,得到主要的格子分割线
  180. def merge_lines(lines, is_horizontal=True):
  181. """合并相近的直线,并过滤掉太短的线"""
  182. if lines is None or len(lines) == 0:
  183. return []
  184. merged = []
  185. used = set()
  186. # 合并阈值:根据图片尺寸调整
  187. merge_threshold_h = max(im_h * 0.03, 30) # 水平线合并阈值
  188. merge_threshold_v = max(im_w * 0.03, 30) # 垂直线合并阈值
  189. for i, line in enumerate(lines):
  190. if i in used:
  191. continue
  192. x1, y1, x2, y2 = line[0]
  193. if is_horizontal:
  194. # 水平线:合并Y坐标相近的线
  195. y = (y1 + y2) / 2
  196. similar_lines = [line]
  197. used.add(i)
  198. for j, other_line in enumerate(lines[i+1:], i+1):
  199. if j in used:
  200. continue
  201. ox1, oy1, ox2, oy2 = other_line[0]
  202. oy = (oy1 + oy2) / 2
  203. # 如果Y坐标相近(在阈值内),合并
  204. if abs(y - oy) < merge_threshold_h:
  205. similar_lines.append(other_line)
  206. used.add(j)
  207. # 计算合并后的线的平均Y坐标和X范围
  208. avg_y = int(np.mean([(l[0][1] + l[0][3]) / 2 for l in similar_lines]))
  209. min_x = int(min([min(l[0][0], l[0][2]) for l in similar_lines]))
  210. max_x = int(max([max(l[0][0], l[0][2]) for l in similar_lines]))
  211. # 过滤:水平线必须跨越至少50%的图片宽度
  212. if (max_x - min_x) > im_w * 0.5:
  213. merged.append((min_x, avg_y, max_x, avg_y))
  214. else:
  215. # 垂直线:合并X坐标相近的线
  216. x = (x1 + x2) / 2
  217. similar_lines = [line]
  218. used.add(i)
  219. for j, other_line in enumerate(lines[i+1:], i+1):
  220. if j in used:
  221. continue
  222. ox1, oy1, ox2, oy2 = other_line[0]
  223. ox = (ox1 + ox2) / 2
  224. # 如果X坐标相近(在阈值内),合并
  225. if abs(x - ox) < merge_threshold_v:
  226. similar_lines.append(other_line)
  227. used.add(j)
  228. # 计算合并后的线的平均X坐标和Y范围
  229. avg_x = int(np.mean([(l[0][0] + l[0][2]) / 2 for l in similar_lines]))
  230. min_y = int(min([min(l[0][1], l[0][3]) for l in similar_lines]))
  231. max_y = int(max([max(l[0][1], l[0][3]) for l in similar_lines]))
  232. # 过滤:垂直线必须跨越至少50%的图片高度
  233. if (max_y - min_y) > im_h * 0.5:
  234. merged.append((avg_x, min_y, avg_x, max_y))
  235. return merged
  236. h_merged = merge_lines(h_lines, is_horizontal=True)
  237. v_merged = merge_lines(v_lines, is_horizontal=False)
  238. print(f"[DEBUG] 检测到 {len(h_merged)} 条水平线和 {len(v_merged)} 条垂直线")
  239. # 5. 根据水平和垂直线构建格子
  240. # 添加图片边界线
  241. h_coords = [0] # 上边界
  242. for line in h_merged:
  243. y = line[1]
  244. if 0 < y < im_h:
  245. h_coords.append(y)
  246. h_coords.append(im_h) # 下边界
  247. h_coords = sorted(set(h_coords))
  248. v_coords = [0] # 左边界
  249. for line in v_merged:
  250. x = line[0]
  251. if 0 < x < im_w:
  252. v_coords.append(x)
  253. v_coords.append(im_w) # 右边界
  254. v_coords = sorted(set(v_coords))
  255. print(f"[DEBUG] 水平分割线Y坐标: {h_coords}")
  256. print(f"[DEBUG] 垂直分割线X坐标: {v_coords}")
  257. # 6. 构建格子(每行最多两个格子)
  258. panels = []
  259. min_panel_area = img_area * 0.02
  260. # 过滤掉太近的水平线(可能是重复检测)
  261. filtered_h_coords = [h_coords[0]]
  262. for i in range(1, len(h_coords) - 1):
  263. if h_coords[i] - filtered_h_coords[-1] > im_h * 0.05: # 至少间隔5%的高度
  264. filtered_h_coords.append(h_coords[i])
  265. filtered_h_coords.append(h_coords[-1])
  266. # 过滤掉太近的垂直线(可能是重复检测)
  267. filtered_v_coords = [v_coords[0]]
  268. for i in range(1, len(v_coords) - 1):
  269. if v_coords[i] - filtered_v_coords[-1] > im_w * 0.05: # 至少间隔5%的宽度
  270. filtered_v_coords.append(v_coords[i])
  271. filtered_v_coords.append(v_coords[-1])
  272. print(f"[DEBUG] 过滤后: {len(filtered_h_coords)} 条水平分割线, {len(filtered_v_coords)} 条垂直分割线")
  273. # 根据过滤后的坐标构建格子
  274. for i in range(len(filtered_h_coords) - 1):
  275. y1 = filtered_h_coords[i]
  276. y2 = filtered_h_coords[i + 1]
  277. # 一行最多两个格子
  278. # 找到在这个行范围内的垂直分割线
  279. row_v_coords = [filtered_v_coords[0]] # 行的左边界
  280. for v_x in filtered_v_coords[1:-1]: # 排除左右边界
  281. # 检查这条垂直线是否与当前行相交
  282. # 检查垂直线附近是否有足够的边缘响应
  283. line_region = combined_edges[y1:y2, max(0, v_x-10):min(im_w, v_x+10)]
  284. if np.sum(line_region > 0) > (y2 - y1) * 0.2: # 至少20%的区域有边缘
  285. row_v_coords.append(v_x)
  286. row_v_coords.append(filtered_v_coords[-1]) # 行的右边界
  287. # 如果一行有太多垂直分割线,只保留主要的(每行最多2个格子,所以最多3条垂直线:左、中、右)
  288. if len(row_v_coords) > 3:
  289. # 选择最靠近左、中、右位置的线
  290. left = row_v_coords[0]
  291. right = row_v_coords[-1]
  292. mid = (left + right) / 2
  293. # 找到最接近中间位置的垂直线
  294. closest_mid = min(row_v_coords[1:-1], key=lambda x: abs(x - mid))
  295. row_v_coords = [left, closest_mid, right]
  296. # 根据垂直分割线创建格子(每行最多两个格子)
  297. for j in range(len(row_v_coords) - 1):
  298. x1 = row_v_coords[j]
  299. x2 = row_v_coords[j + 1]
  300. # 基本过滤
  301. w = x2 - x1
  302. h = y2 - y1
  303. area = w * h
  304. # 过滤:面积太小或高度/宽度太小
  305. min_height = im_h * 0.08 # 至少占图片高度的8%
  306. min_width = im_w * 0.15 # 至少占图片宽度的15%
  307. if area < min_panel_area or h < min_height or w < min_width:
  308. continue
  309. # 验证格子:内侧有内容,外侧是白色
  310. panel_candidate = {
  311. 'x': int(x1),
  312. 'y': int(y1),
  313. 'width': int(w),
  314. 'height': int(h),
  315. 'area': area,
  316. 'center_x': float(x1 + w / 2),
  317. 'center_y': float(y1 + h / 2)
  318. }
  319. # 放宽验证条件,因为我们已经基于线条构建了格子
  320. if validate_panel(gray, panel_candidate, border_width=max(5, int(min(im_w, im_h) * 0.005))):
  321. panels.append(panel_candidate)
  322. # 绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0)
  323. panel_mask = np.ones_like(gray) * 255 # 创建全白的mask
  324. for panel in panels:
  325. cv2.rectangle(panel_mask,
  326. (panel['x'], panel['y']),
  327. (panel['x'] + panel['width'],
  328. panel['y'] + panel['height']),
  329. 0, 4) # 绘制黑色的格子线
  330. return panel_mask, panels
  331. def detect_panels_from_text_mask(img, text_mask=None):
  332. """
  333. 基于文字遮罩图的连通域分析检测格子
  334. 使用文字遮罩图找到包含文字的大连通区域,这些区域很可能就是格子
  335. 参数:
  336. img: 输入图片(BGR格式)
  337. text_mask: 文字遮罩图(灰度图,文字区域为白色255,其他为黑色0)
  338. 返回:
  339. panel_mask: 格子遮罩图
  340. panels: 格子列表
  341. """
  342. if len(img.shape) == 3:
  343. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  344. else:
  345. gray = img.copy()
  346. im_h, im_w = gray.shape
  347. img_area = im_h * im_w
  348. # 如果没有提供文字遮罩图,使用传统方法
  349. if text_mask is None:
  350. return detect_comic_panels(img)
  351. # 确保文字遮罩图尺寸匹配
  352. if text_mask.shape[:2] != (im_h, im_w):
  353. text_mask = cv2.resize(text_mask, (im_w, im_h))
  354. # 方法:基于文字遮罩图的连通域分析
  355. # 1. 对文字遮罩图进行膨胀,连接相近的文字区域
  356. # 2. 使用连通域分析找到包含文字的大区域
  357. # 3. 扩展这些区域以包含周围的空白区域
  358. # 4. 验证每个区域是否符合格子特征
  359. # 对文字遮罩图进行膨胀,连接相近的文字区域
  360. kernel_size = max(5, int(min(im_w, im_h) * 0.01))
  361. kernel = np.ones((kernel_size, kernel_size), np.uint8)
  362. dilated_mask = cv2.dilate(text_mask, kernel, iterations=3)
  363. dilated_mask = cv2.morphologyEx(dilated_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
  364. # 使用连通域分析找到包含文字的大区域
  365. connectivity = 8
  366. num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
  367. dilated_mask, connectivity, cv2.CV_32S
  368. )
  369. print(f"[DEBUG] 文字遮罩图连通域分析:找到 {num_labels - 1} 个连通区域(排除背景)")
  370. panels = []
  371. min_panel_area = img_area * 0.02 # 最小格子面积(2%)
  372. max_panel_area = img_area * 0.50 # 最大格子面积(50%)
  373. # 遍历所有连通区域(跳过背景标签0)
  374. for label_index in range(1, num_labels):
  375. stat = stats[label_index]
  376. x, y, w, h, area = stat
  377. # 基本过滤:面积太小或太大
  378. if area < min_panel_area or area > max_panel_area:
  379. continue
  380. # 计算宽高比
  381. aspect_ratio = w / h if h > 0 else 0
  382. if aspect_ratio < 0.15 or aspect_ratio > 6.0:
  383. continue
  384. # 扩展边界框以包含周围的空白区域
  385. # 扩展比例:向四周扩展文字区域尺寸的 30-50%
  386. expand_x = int(w * 0.4)
  387. expand_y = int(h * 0.4)
  388. panel_x1 = max(0, x - expand_x)
  389. panel_y1 = max(0, y - expand_y)
  390. panel_x2 = min(im_w, x + w + expand_x)
  391. panel_y2 = min(im_h, y + h + expand_y)
  392. panel_w = panel_x2 - panel_x1
  393. panel_h = panel_y2 - panel_y1
  394. panel_area = panel_w * panel_h
  395. # 验证格子
  396. panel_candidate = {
  397. 'x': panel_x1,
  398. 'y': panel_y1,
  399. 'width': panel_w,
  400. 'height': panel_h,
  401. 'area': panel_area,
  402. 'center_x': float(panel_x1 + panel_w / 2),
  403. 'center_y': float(panel_y1 + panel_h / 2)
  404. }
  405. if validate_panel(gray, panel_candidate, border_width=max(10, int(min(im_w, im_h) * 0.01))):
  406. panels.append(panel_candidate)
  407. # 合并重叠的格子
  408. merged_panels = []
  409. for panel in panels:
  410. merged = False
  411. for i, existing in enumerate(merged_panels):
  412. # 计算重叠
  413. overlap_x = max(0, min(panel['x'] + panel['width'],
  414. existing['x'] + existing['width']) -
  415. max(panel['x'], existing['x']))
  416. overlap_y = max(0, min(panel['y'] + panel['height'],
  417. existing['y'] + existing['height']) -
  418. max(panel['y'], existing['y']))
  419. overlap_area = overlap_x * overlap_y
  420. # 如果重叠面积超过较小格子的50%,合并它们
  421. min_area_for_merge = min(panel['area'], existing['area'])
  422. if overlap_area > min_area_for_merge * 0.5:
  423. # 合并:取两个格子的并集
  424. new_x = min(panel['x'], existing['x'])
  425. new_y = min(panel['y'], existing['y'])
  426. new_w = max(panel['x'] + panel['width'],
  427. existing['x'] + existing['width']) - new_x
  428. new_h = max(panel['y'] + panel['height'],
  429. existing['y'] + existing['height']) - new_y
  430. merged_panels[i] = {
  431. 'x': int(new_x),
  432. 'y': int(new_y),
  433. 'width': int(new_w),
  434. 'height': int(new_h),
  435. 'center_x': float(new_x + new_w / 2),
  436. 'center_y': float(new_y + new_h / 2)
  437. }
  438. merged = True
  439. break
  440. if not merged:
  441. merged_panels.append(panel)
  442. # 绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0)
  443. panel_mask = np.ones_like(gray) * 255 # 创建全白的mask
  444. for panel in merged_panels:
  445. cv2.rectangle(panel_mask,
  446. (panel['x'], panel['y']),
  447. (panel['x'] + panel['width'],
  448. panel['y'] + panel['height']),
  449. 0, 4) # 绘制黑色的格子线
  450. return panel_mask, merged_panels
  451. def detect_panels_from_text_blocks(img, text_blocks=None):
  452. """
  453. 基于文字块位置检测格子
  454. 如果提供了文字块列表,使用它们来推断格子边界
  455. 参数:
  456. img: 输入图片(BGR格式)
  457. text_blocks: 文字块列表(可选),每个元素包含 xyxy 坐标 [x1, y1, x2, y2]
  458. 返回:
  459. panel_mask: 格子遮罩图
  460. panels: 格子列表
  461. """
  462. if len(img.shape) == 3:
  463. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  464. else:
  465. gray = img.copy()
  466. im_h, im_w = gray.shape
  467. img_area = im_h * im_w
  468. # 如果没有提供文字块,使用传统方法
  469. if text_blocks is None or len(text_blocks) == 0:
  470. return detect_comic_panels(img)
  471. # 方法:基于文字块聚类和区域扩展
  472. # 1. 根据文字块位置进行聚类(使用简单的距离阈值)
  473. # 2. 对每个聚类,扩展边界框以包含周围的空白区域
  474. # 3. 验证扩展后的区域是否符合格子特征
  475. panels = []
  476. used_blocks = set()
  477. # 计算文字块之间的距离,进行聚类
  478. min_panel_area = img_area * 0.02 # 最小格子面积
  479. max_panel_area = img_area * 0.50 # 最大格子面积
  480. for i, block in enumerate(text_blocks):
  481. if i in used_blocks:
  482. continue
  483. # 获取文字块的边界框
  484. if isinstance(block, dict):
  485. x1, y1, x2, y2 = block.get('xyxy', block.get('bbox', [0, 0, 0, 0]))
  486. else:
  487. x1, y1, x2, y2 = block[:4] if len(block) >= 4 else [0, 0, 0, 0]
  488. if x2 <= x1 or y2 <= y1:
  489. continue
  490. # 找到与当前文字块相邻的其他文字块(聚类)
  491. cluster_blocks = [i]
  492. used_blocks.add(i)
  493. # 扩展搜索范围:查找附近的文字块
  494. search_margin = max((x2 - x1) * 2, (y2 - y1) * 2, 100)
  495. for j, other_block in enumerate(text_blocks):
  496. if j in used_blocks or j == i:
  497. continue
  498. if isinstance(other_block, dict):
  499. ox1, oy1, ox2, oy2 = other_block.get('xyxy', other_block.get('bbox', [0, 0, 0, 0]))
  500. else:
  501. ox1, oy1, ox2, oy2 = other_block[:4] if len(other_block) >= 4 else [0, 0, 0, 0]
  502. if ox2 <= ox1 or oy2 <= oy1:
  503. continue
  504. # 计算两个文字块的距离
  505. center_x = (x1 + x2) / 2
  506. center_y = (y1 + y2) / 2
  507. o_center_x = (ox1 + ox2) / 2
  508. o_center_y = (oy1 + oy2) / 2
  509. distance = np.sqrt((center_x - o_center_x)**2 + (center_y - o_center_y)**2)
  510. # 如果距离在搜索范围内,加入聚类
  511. if distance < search_margin:
  512. cluster_blocks.append(j)
  513. used_blocks.add(j)
  514. # 计算聚类的边界框
  515. def get_bbox(block):
  516. if isinstance(block, dict):
  517. return block.get('xyxy', block.get('bbox', [0, 0, 0, 0]))
  518. else:
  519. return block[:4] if len(block) >= 4 else [0, 0, 0, 0]
  520. cluster_bboxes = [get_bbox(text_blocks[b]) for b in cluster_blocks]
  521. cluster_x1 = min([bbox[0] for bbox in cluster_bboxes])
  522. cluster_y1 = min([bbox[1] for bbox in cluster_bboxes])
  523. cluster_x2 = max([bbox[2] for bbox in cluster_bboxes])
  524. cluster_y2 = max([bbox[3] for bbox in cluster_bboxes])
  525. # 扩展边界框以包含周围的空白区域
  526. # 扩展比例:向四周扩展文字块尺寸的 20-50%
  527. expand_x = int((cluster_x2 - cluster_x1) * 0.3)
  528. expand_y = int((cluster_y2 - cluster_y1) * 0.3)
  529. panel_x1 = max(0, int(cluster_x1 - expand_x))
  530. panel_y1 = max(0, int(cluster_y1 - expand_y))
  531. panel_x2 = min(im_w, int(cluster_x2 + expand_x))
  532. panel_y2 = min(im_h, int(cluster_y2 + expand_y))
  533. panel_w = panel_x2 - panel_x1
  534. panel_h = panel_y2 - panel_y1
  535. panel_area = panel_w * panel_h
  536. # 基本过滤
  537. if panel_area < min_panel_area or panel_area > max_panel_area:
  538. continue
  539. # 验证格子
  540. panel_candidate = {
  541. 'x': panel_x1,
  542. 'y': panel_y1,
  543. 'width': panel_w,
  544. 'height': panel_h,
  545. 'area': panel_area,
  546. 'center_x': float(panel_x1 + panel_w / 2),
  547. 'center_y': float(panel_y1 + panel_h / 2)
  548. }
  549. if validate_panel(gray, panel_candidate, border_width=max(10, int(min(im_w, im_h) * 0.01))):
  550. panels.append(panel_candidate)
  551. # 绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0)
  552. panel_mask = np.ones_like(gray) * 255 # 创建全白的mask
  553. for panel in panels:
  554. cv2.rectangle(panel_mask,
  555. (panel['x'], panel['y']),
  556. (panel['x'] + panel['width'],
  557. panel['y'] + panel['height']),
  558. 0, 4) # 绘制黑色的格子线
  559. return panel_mask, panels
  560. def detect_comic_panels(img, text_blocks=None, text_mask=None):
  561. """
  562. 使用opencv检测漫画格子(分镜框)- 改进版
  563. 策略:优先使用基于白色边界的方法,其次使用文字遮罩图,最后使用传统方法
  564. 参数:
  565. img: 输入图片(BGR格式)
  566. text_blocks: 文字块列表(可选),来自 comic-text-detector
  567. text_mask: 文字遮罩图(可选),来自 comic-text-detector
  568. 返回:
  569. panel_mask: 格子遮罩图(灰度图,格子线为黑色0,其他为白色255)
  570. panels: 格子列表,每个格子包含边界框信息
  571. """
  572. # 优先使用基于白色边界的方法(利用漫画边缘一定是白色的特点)
  573. try:
  574. print(f"[DEBUG] 尝试使用基于白色边界的方法...")
  575. panel_mask, panels = detect_panels_from_white_borders(img)
  576. print(f"[DEBUG] 基于白色边界的方法检测到 {len(panels)} 个格子")
  577. if len(panels) >= 4: # 如果检测到足够多的格子,使用这个方法
  578. print(f"[DEBUG] 使用基于白色边界的方法,检测到 {len(panels)} 个格子")
  579. return panel_mask, panels
  580. else:
  581. print(f"[DEBUG] 基于白色边界的方法检测到的格子数量不足({len(panels)}个),尝试其他方法")
  582. except Exception as e:
  583. print(f"[WARN] 基于白色边界的方法失败: {e}")
  584. import traceback
  585. traceback.print_exc()
  586. # 其次使用基于文字遮罩图的方法
  587. if text_mask is not None:
  588. try:
  589. print(f"[DEBUG] 尝试使用基于文字遮罩图的连通域分析方法...")
  590. panel_mask, panels = detect_panels_from_text_mask(img, text_mask)
  591. print(f"[DEBUG] 基于文字遮罩图的方法检测到 {len(panels)} 个格子")
  592. if len(panels) >= 4: # 如果检测到足够多的格子,使用这个方法
  593. print(f"[DEBUG] 使用基于文字遮罩图的方法,检测到 {len(panels)} 个格子")
  594. return panel_mask, panels
  595. else:
  596. print(f"[DEBUG] 基于文字遮罩图的方法检测到的格子数量不足({len(panels)}个),尝试其他方法")
  597. except Exception as e:
  598. print(f"[WARN] 基于文字遮罩图的方法失败: {e}")
  599. import traceback
  600. traceback.print_exc()
  601. # 再次使用基于文字块的方法
  602. if text_blocks is not None and len(text_blocks) > 0:
  603. try:
  604. print(f"[DEBUG] 尝试使用基于文字块的方法,文字块数量: {len(text_blocks)}")
  605. panel_mask, panels = detect_panels_from_text_blocks(img, text_blocks)
  606. print(f"[DEBUG] 基于文字块的方法检测到 {len(panels)} 个格子")
  607. if len(panels) >= 4: # 如果检测到足够多的格子,使用这个方法
  608. print(f"[DEBUG] 使用基于文字块的方法,检测到 {len(panels)} 个格子")
  609. return panel_mask, panels
  610. else:
  611. print(f"[DEBUG] 基于文字块的方法检测到的格子数量不足({len(panels)}个),使用传统方法")
  612. except Exception as e:
  613. print(f"[WARN] 基于文字块的方法失败,使用传统方法: {e}")
  614. import traceback
  615. traceback.print_exc()
  616. # 传统方法:基于边缘检测和轮廓分析
  617. # 转换为灰度图
  618. if len(img.shape) == 3:
  619. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  620. else:
  621. gray = img.copy()
  622. im_h, im_w = gray.shape
  623. img_area = im_h * im_w
  624. # 第一步:放宽条件,检测所有可能的候选格子
  625. # 方法1: 改进的边缘检测(更宽松)
  626. blurred = cv2.GaussianBlur(gray, (5, 5), 0)
  627. # 使用自适应阈值(更敏感)
  628. adaptive_thresh = cv2.adaptiveThreshold(
  629. blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
  630. cv2.THRESH_BINARY_INV, 11, 2
  631. )
  632. # Canny边缘检测(更敏感的参数)
  633. edges = cv2.Canny(blurred, 20, 80, apertureSize=3)
  634. # 合并自适应阈值和Canny边缘
  635. combined_edges = cv2.bitwise_or(edges, adaptive_thresh)
  636. # 方法2: 检测水平线和垂直线(更宽松的kernel)
  637. h_kernel_size = max(int(im_w * 0.03), 20) # 水平线kernel宽度(更小,更敏感)
  638. v_kernel_size = max(int(im_h * 0.03), 20) # 垂直线kernel高度(更小,更敏感)
  639. # 检测水平线
  640. horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_kernel_size, 1))
  641. horizontal_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, horizontal_kernel)
  642. horizontal_lines = cv2.dilate(horizontal_lines, horizontal_kernel, iterations=3) # 增加迭代次数
  643. # 检测垂直线
  644. vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_kernel_size))
  645. vertical_lines = cv2.morphologyEx(combined_edges, cv2.MORPH_OPEN, vertical_kernel)
  646. vertical_lines = cv2.dilate(vertical_lines, vertical_kernel, iterations=3) # 增加迭代次数
  647. # 合并水平和垂直线
  648. lines_mask = cv2.bitwise_or(horizontal_lines, vertical_lines)
  649. # 对边缘进行膨胀,连接断开的线条(更激进)
  650. kernel_size = max(5, int(min(im_w, im_h) * 0.005))
  651. kernel = np.ones((kernel_size, kernel_size), np.uint8)
  652. dilated = cv2.dilate(lines_mask, kernel, iterations=5) # 增加迭代次数
  653. dilated = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, kernel, iterations=3) # 增加迭代次数
  654. # 如果线条mask太稀疏,尝试使用更直接的方法:基于灰度值的分割
  655. # 使用阈值分割,找到可能的格子区域
  656. _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
  657. # 合并线条mask和阈值分割结果
  658. combined_mask = cv2.bitwise_or(dilated, thresh)
  659. # 查找轮廓(使用RETR_TREE获取所有轮廓,包括嵌套的)
  660. contours, hierarchy = cv2.findContours(
  661. combined_mask,
  662. cv2.RETR_TREE,
  663. cv2.CHAIN_APPROX_SIMPLE
  664. )
  665. print(f"[DEBUG] 找到 {len(contours)} 个轮廓")
  666. # 第二步:放宽条件,收集所有候选格子
  667. candidate_panels = []
  668. min_area = img_area * 0.005 # 非常宽松:至少占图片0.5%的面积
  669. max_area = img_area * 0.95 # 最大不超过95%
  670. for contour in contours:
  671. # 计算轮廓的边界框
  672. x, y, w, h = cv2.boundingRect(contour)
  673. area = w * h
  674. # 基本过滤(非常宽松)
  675. if area < min_area or area > max_area:
  676. continue
  677. # 计算宽高比(非常宽松的范围)
  678. aspect_ratio = w / h if h > 0 else 0
  679. if aspect_ratio < 0.1 or aspect_ratio > 10.0:
  680. continue
  681. candidate_panels.append({
  682. 'x': int(x),
  683. 'y': int(y),
  684. 'width': int(w),
  685. 'height': int(h),
  686. 'area': area,
  687. 'aspect_ratio': aspect_ratio,
  688. 'center_x': float(x + w / 2),
  689. 'center_y': float(y + h / 2)
  690. })
  691. # 按面积排序,优先选择较大的区域(更可能是主要格子)
  692. candidate_panels.sort(key=lambda p: p['area'], reverse=True)
  693. # 如果候选格子太多,只保留前100个最大的(避免验证太多小区域)
  694. if len(candidate_panels) > 100:
  695. candidate_panels = candidate_panels[:100]
  696. print(f"[DEBUG] 检测到 {len(candidate_panels)} 个候选格子(已按面积排序)")
  697. # 第三步:验证每个候选格子是否符合要求(内侧有画面,外侧完全空白)
  698. valid_panels = []
  699. border_width = max(10, int(min(im_w, im_h) * 0.01))
  700. for i, candidate in enumerate(candidate_panels):
  701. is_valid = validate_panel(gray, candidate, border_width=border_width)
  702. if is_valid:
  703. valid_panels.append(candidate)
  704. # 只对前10个候选格子输出调试信息
  705. if i < 10:
  706. area_ratio = (candidate['area'] / img_area) * 100
  707. print(f"[DEBUG] 候选格子 {i+1}: 面积={candidate['area']:.0f} ({area_ratio:.2f}%), "
  708. f"尺寸={candidate['width']}x{candidate['height']}, "
  709. f"位置=({candidate['x']}, {candidate['y']}), 验证={'通过' if is_valid else '失败'}")
  710. print(f"[DEBUG] 验证后保留 {len(valid_panels)} 个有效格子")
  711. # 第四步:合并重叠和相邻的格子
  712. panels = []
  713. for candidate in valid_panels:
  714. merged = False
  715. # 检查是否可以与已有格子合并
  716. for i, existing in enumerate(panels):
  717. # 计算重叠或相邻关系
  718. overlap_x = max(0, min(candidate['x'] + candidate['width'],
  719. existing['x'] + existing['width']) -
  720. max(candidate['x'], existing['x']))
  721. overlap_y = max(0, min(candidate['y'] + candidate['height'],
  722. existing['y'] + existing['height']) -
  723. max(candidate['y'], existing['y']))
  724. overlap_area = overlap_x * overlap_y
  725. # 计算已有格子的面积
  726. existing_area = existing['width'] * existing['height']
  727. # 如果重叠面积超过较小格子的30%,合并它们
  728. min_area_for_merge = min(candidate['area'], existing_area)
  729. if overlap_area > min_area_for_merge * 0.3:
  730. # 合并:取两个格子的并集
  731. new_x = min(candidate['x'], existing['x'])
  732. new_y = min(candidate['y'], existing['y'])
  733. new_w = max(candidate['x'] + candidate['width'],
  734. existing['x'] + existing['width']) - new_x
  735. new_h = max(candidate['y'] + candidate['height'],
  736. existing['y'] + existing['height']) - new_y
  737. panels[i] = {
  738. 'x': int(new_x),
  739. 'y': int(new_y),
  740. 'width': int(new_w),
  741. 'height': int(new_h),
  742. 'center_x': float(new_x + new_w / 2),
  743. 'center_y': float(new_y + new_h / 2)
  744. }
  745. merged = True
  746. break
  747. # 如果不能合并,且不与已有格子重叠太多,添加为新格子
  748. if not merged:
  749. overlap_with_existing = False
  750. for existing in panels:
  751. overlap_x = max(0, min(candidate['x'] + candidate['width'],
  752. existing['x'] + existing['width']) -
  753. max(candidate['x'], existing['x']))
  754. overlap_y = max(0, min(candidate['y'] + candidate['height'],
  755. existing['y'] + existing['height']) -
  756. max(candidate['y'], existing['y']))
  757. overlap_area = overlap_x * overlap_y
  758. existing_area = existing['width'] * existing['height']
  759. min_area_check = min(candidate['area'], existing_area)
  760. # 如果重叠超过50%,跳过(可能是子区域)
  761. if overlap_area > min_area_check * 0.5:
  762. overlap_with_existing = True
  763. break
  764. if not overlap_with_existing:
  765. panels.append({
  766. 'x': candidate['x'],
  767. 'y': candidate['y'],
  768. 'width': candidate['width'],
  769. 'height': candidate['height'],
  770. 'center_x': candidate['center_x'],
  771. 'center_y': candidate['center_y']
  772. })
  773. # 第五步:绘制遮罩图(黑线白底:背景为白色255,格子线为黑色0)
  774. panel_mask = np.ones_like(gray) * 255 # 创建全白的mask
  775. for panel in panels:
  776. cv2.rectangle(panel_mask,
  777. (panel['x'], panel['y']),
  778. (panel['x'] + panel['width'],
  779. panel['y'] + panel['height']),
  780. 0, 4) # 绘制黑色的格子线
  781. return panel_mask, panels
  782. def merge_panel_mask_with_text_mask(panel_mask, text_mask):
  783. """
  784. 合并格子遮罩图和文字mask图
  785. 参数:
  786. panel_mask: 格子遮罩图(格子线为黑色0,其他为白色255)
  787. text_mask: 文字mask图
  788. 返回:
  789. combined_mask: 合并后的mask图
  790. """
  791. # 确保两个mask尺寸一致
  792. if panel_mask.shape != text_mask.shape:
  793. panel_mask = cv2.resize(panel_mask, (text_mask.shape[1], text_mask.shape[0]))
  794. # 合并:格子线(黑色0)和文字mask(非零部分)都保留
  795. # 格子遮罩图中,格子线是黑色(0),其他是白色(255)
  796. # 文字mask中,文字区域是非零值(通常是白色255)
  797. # 合并策略:将panel_mask反转(黑线变白线),然后与text_mask合并(取最大值)
  798. # 这样格子线(白色)和文字(白色)都会保留,背景为黑色
  799. panel_mask_inv = cv2.bitwise_not(panel_mask) # 反转:黑线(0)变白线(255),白底(255)变黑底(0)
  800. combined_mask = np.maximum(panel_mask_inv, text_mask) # 合并:保留格子线(白色)和文字(白色)
  801. return combined_mask
  802. if __name__ == '__main__':
  803. import argparse
  804. parser = argparse.ArgumentParser(description='检测漫画格子并生成遮罩图')
  805. parser.add_argument('image', help='输入图片路径')
  806. parser.add_argument('-o', '--output', help='输出目录')
  807. parser.add_argument('--text-mask', help='文字mask图片路径(可选,用于合并)')
  808. parser.add_argument('--text-blocks', help='文字块JSON文件路径(可选,用于辅助格子检测)')
  809. args = parser.parse_args()
  810. image_path = Path(args.image)
  811. if not image_path.exists():
  812. print(f"[ERROR] 图片文件不存在: {image_path}")
  813. sys.exit(1)
  814. # 读取图片(处理中文路径)
  815. # 在Windows上,cv2.imread可能无法直接读取包含中文的路径
  816. # 使用numpy先读取文件,然后解码
  817. try:
  818. import numpy as np
  819. with open(str(image_path), 'rb') as f:
  820. img_data = np.frombuffer(f.read(), np.uint8)
  821. img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
  822. except Exception as e:
  823. # 如果上述方法失败,尝试直接读取
  824. img = cv2.imread(str(image_path))
  825. if img is None:
  826. print(f"[ERROR] 无法读取图片文件: {image_path}")
  827. print(f"[DEBUG] 尝试使用绝对路径: {image_path.resolve()}")
  828. sys.exit(1)
  829. # 尝试读取文字块信息(如果提供,或自动查找)
  830. text_blocks = None
  831. # 如果提供了文字块文件路径,使用它
  832. if args.text_blocks:
  833. text_blocks_path = Path(args.text_blocks)
  834. else:
  835. # 否则,尝试自动查找文字块JSON文件
  836. # 查找可能的文件名:{image_name}_dialogues.json, {image_name}_text_blocks.json
  837. # 优先从tmp目录查找,然后从output_dir查找
  838. image_name = image_path.stem
  839. output_dir = Path(args.output) if args.output else image_path.parent
  840. tmp_dir = output_dir / 'tmp'
  841. possible_names = [
  842. tmp_dir / f"{image_name}_dialogues.json", # 优先从tmp目录查找
  843. output_dir / f"{image_name}_dialogues.json",
  844. output_dir / f"{image_name}_text_blocks.json",
  845. image_path.parent / f"{image_name}_dialogues.json",
  846. image_path.parent / f"{image_name}_text_blocks.json",
  847. ]
  848. text_blocks_path = None
  849. for possible_path in possible_names:
  850. if possible_path.exists():
  851. text_blocks_path = possible_path
  852. print(f"[INFO] 自动找到文字块文件: {text_blocks_path}")
  853. break
  854. # 读取文字块信息
  855. if text_blocks_path and text_blocks_path.exists():
  856. try:
  857. with open(text_blocks_path, 'r', encoding='utf-8') as f:
  858. text_blocks_data = json.load(f)
  859. # 尝试从不同格式的JSON中提取文字块信息
  860. if isinstance(text_blocks_data, list):
  861. text_blocks = text_blocks_data
  862. elif isinstance(text_blocks_data, dict):
  863. # 可能是包含 dialogues 或其他字段的格式
  864. if 'dialogues' in text_blocks_data:
  865. text_blocks = text_blocks_data['dialogues']
  866. elif 'text_blocks' in text_blocks_data:
  867. text_blocks = text_blocks_data['text_blocks']
  868. elif 'blocks' in text_blocks_data:
  869. text_blocks = text_blocks_data['blocks']
  870. # 转换文字块格式为统一格式
  871. if text_blocks:
  872. formatted_blocks = []
  873. for block in text_blocks:
  874. if isinstance(block, dict):
  875. # 尝试提取 bbox 或 xyxy
  876. if 'bbox' in block:
  877. bbox = block['bbox']
  878. formatted_blocks.append({
  879. 'xyxy': [bbox['x1'], bbox['y1'], bbox['x2'], bbox['y2']]
  880. })
  881. elif 'xyxy' in block:
  882. formatted_blocks.append({'xyxy': block['xyxy']})
  883. text_blocks = formatted_blocks if formatted_blocks else text_blocks
  884. print(f"[INFO] 从JSON文件读取到 {len(text_blocks)} 个文字块")
  885. except Exception as e:
  886. print(f"[WARN] 无法读取文字块JSON文件: {e}")
  887. text_blocks = None
  888. # 尝试读取文字遮罩图(如果提供,或自动查找)
  889. text_mask = None
  890. # 如果提供了文字遮罩图路径,使用它
  891. if args.text_mask:
  892. text_mask_path = Path(args.text_mask)
  893. if text_mask_path.exists():
  894. try:
  895. text_mask = cv2.imread(str(text_mask_path), cv2.IMREAD_GRAYSCALE)
  896. if text_mask is not None:
  897. print(f"[INFO] 从文件读取文字遮罩图: {text_mask_path}")
  898. except Exception as e:
  899. print(f"[WARN] 无法读取文字遮罩图: {e}")
  900. else:
  901. # 否则,尝试自动查找文字遮罩图
  902. # 优先从tmp目录查找,然后从output_dir查找
  903. image_name = image_path.stem
  904. output_dir = Path(args.output) if args.output else image_path.parent
  905. tmp_dir = output_dir / 'tmp'
  906. possible_names = [
  907. tmp_dir / f"{image_name}_text_mask.png", # 优先从tmp目录查找
  908. output_dir / f"{image_name}_text_mask.png",
  909. image_path.parent / f"{image_name}_text_mask.png",
  910. ]
  911. for possible_path in possible_names:
  912. if possible_path.exists():
  913. try:
  914. text_mask = cv2.imread(str(possible_path), cv2.IMREAD_GRAYSCALE)
  915. if text_mask is not None:
  916. print(f"[INFO] 自动找到文字遮罩图: {possible_path}")
  917. break
  918. except Exception as e:
  919. continue
  920. # 检测格子
  921. print(f"[INFO] 正在检测漫画格子: {image_path.name}")
  922. panel_mask, panels = detect_comic_panels(img, text_blocks=text_blocks, text_mask=text_mask)
  923. print(f"[OK] 检测到 {len(panels)} 个格子")
  924. # 设置输出目录
  925. if args.output:
  926. output_dir = Path(args.output)
  927. output_dir.mkdir(parents=True, exist_ok=True)
  928. else:
  929. output_dir = image_path.parent
  930. # 如果output_dir已经是tmp目录,直接使用它;否则创建tmp子目录
  931. # 检查路径的最后一部分是否是'tmp'(支持相对路径和绝对路径)
  932. output_dir_str = str(output_dir)
  933. if output_dir_str.endswith('tmp') or output_dir_str.endswith('tmp\\') or output_dir_str.endswith('tmp/'):
  934. tmp_dir = output_dir
  935. else:
  936. tmp_dir = output_dir / 'tmp'
  937. tmp_dir.mkdir(parents=True, exist_ok=True)
  938. image_name = image_path.stem
  939. # 保存格子遮罩图到tmp目录(中间文件)
  940. panel_mask_path = tmp_dir / f"{image_name}_panel_mask.png"
  941. # 使用cv2.imencode处理中文路径
  942. success, encoded_img = cv2.imencode('.png', panel_mask)
  943. if success:
  944. with open(str(panel_mask_path), 'wb') as f:
  945. f.write(encoded_img.tobytes())
  946. print(f"[OK] 已保存格子遮罩图: {panel_mask_path}")
  947. else:
  948. print(f"[ERROR] 保存格子遮罩图失败: {panel_mask_path}")
  949. # 如果提供了文字mask,进行合并
  950. if args.text_mask:
  951. text_mask_path = Path(args.text_mask)
  952. if text_mask_path.exists():
  953. # 使用np.fromfile处理中文路径
  954. text_mask_array = np.fromfile(str(text_mask_path), dtype=np.uint8)
  955. text_mask = cv2.imdecode(text_mask_array, cv2.IMREAD_GRAYSCALE)
  956. if text_mask is not None:
  957. combined_mask = merge_panel_mask_with_text_mask(panel_mask, text_mask)
  958. combined_mask_path = tmp_dir / f"{image_name}_combined_mask.png"
  959. # 使用cv2.imencode处理中文路径
  960. success, encoded_img = cv2.imencode('.png', combined_mask)
  961. if success:
  962. with open(str(combined_mask_path), 'wb') as f:
  963. f.write(encoded_img.tobytes())
  964. print(f"[OK] 已保存合并后的mask图: {combined_mask_path}")
  965. else:
  966. print(f"[ERROR] 保存合并后的mask图失败: {combined_mask_path}")
  967. else:
  968. print(f"[WARN] 无法读取文字mask图: {text_mask_path}")
  969. else:
  970. print(f"[WARN] 文字mask图不存在: {text_mask_path}")
  971. # 保存格子信息JSON到tmp目录(中间文件)
  972. panels_json = {
  973. 'image_file': image_path.name,
  974. 'panels': panels,
  975. 'total_count': len(panels)
  976. }
  977. json_path = tmp_dir / f"{image_name}_panels.json"
  978. with open(json_path, 'w', encoding='utf-8') as f:
  979. json.dump(panels_json, f, ensure_ascii=False, indent=2)
  980. print(f"[OK] 已保存格子信息: {json_path}")