cut_dialog_blocks_by_green_box.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. # -*- coding: utf-8 -*-
  2. """
  3. 检测图片中的绿色线框,并沿着绿色线框切图
  4. 适用于带绿色线框标记的文字区域图片
  5. """
  6. import sys
  7. import json
  8. import cv2
  9. import numpy as np
  10. from pathlib import Path
  11. # Windows编码修复
  12. if sys.platform == 'win32':
  13. import io
  14. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
  15. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
  16. def detect_green_boxes(image_path, green_color_lower=(0, 200, 0), green_color_upper=(50, 255, 50)):
  17. """
  18. 检测图片中的绿色矩形框(使用OpenCV,不使用任何外部数据)
  19. 参数:
  20. image_path: 图片路径(包含绿色线框的图片)
  21. green_color_lower: 绿色范围下限(BGR格式)
  22. green_color_upper: 绿色范围上限(BGR格式)
  23. 返回:
  24. 检测到的绿色框列表,每个包含 bbox 信息
  25. """
  26. # 读取图片(处理中文路径)
  27. img_array = np.fromfile(str(image_path), dtype=np.uint8)
  28. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  29. if img is None:
  30. raise ValueError(f"无法读取图片: {image_path}")
  31. print(f"[INFO] 读取图片: {Path(image_path).name}")
  32. print(f"[INFO] 图片尺寸: {img.shape[1]}x{img.shape[0]}")
  33. h, w = img.shape[:2]
  34. # 步骤1: 创建绿色掩码(检测绿色像素)
  35. green_mask = cv2.inRange(img, np.array(green_color_lower), np.array(green_color_upper))
  36. # 步骤2: 形态学操作,连接断开的绿色线条
  37. # 使用更大的kernel来更好地连接线条
  38. kernel = np.ones((5, 5), np.uint8)
  39. green_mask = cv2.morphologyEx(green_mask, cv2.MORPH_CLOSE, kernel, iterations=3)
  40. green_mask = cv2.dilate(green_mask, kernel, iterations=2)
  41. # 步骤3: 使用轮廓检测找到绿色框
  42. contours, _ = cv2.findContours(green_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  43. print(f"[INFO] 检测到 {len(contours)} 个绿色轮廓")
  44. # 步骤4: 提取矩形框并过滤
  45. green_boxes = []
  46. for i, contour in enumerate(contours):
  47. # 计算边界框
  48. x, y, box_w, box_h = cv2.boundingRect(contour)
  49. # 过滤太小的区域(可能是噪点)
  50. min_area = w * h * 0.005 # 至少占图片面积的0.5%(提高阈值,过滤小噪点)
  51. if box_w * box_h < min_area:
  52. continue
  53. # 过滤太细的线(宽度或高度小于10像素的可能是线条而不是框)
  54. if box_w < 10 or box_h < 10:
  55. continue
  56. # 计算轮廓面积与边界框面积的比值,过滤不规则的形状
  57. contour_area = cv2.contourArea(contour)
  58. bbox_area = box_w * box_h
  59. if bbox_area > 0:
  60. area_ratio = contour_area / bbox_area
  61. # 如果比值太小,可能是噪点或断开的线条
  62. if area_ratio < 0.1:
  63. continue
  64. green_boxes.append({
  65. 'x1': int(x),
  66. 'y1': int(y),
  67. 'x2': int(x + box_w),
  68. 'y2': int(y + box_h),
  69. 'width': int(box_w),
  70. 'height': int(box_h),
  71. 'center_x': float(x + box_w / 2),
  72. 'center_y': float(y + box_h / 2),
  73. 'area': int(box_w * box_h)
  74. })
  75. print(f"[INFO] 过滤后剩余 {len(green_boxes)} 个绿色框")
  76. # 步骤5: 按位置排序
  77. # 排序规则:
  78. # 1. 最优先:按x坐标排序(右边优先,即x越大越靠前)
  79. # 2. 如果x坐标相近(差值小于阈值),再按y坐标排序(越上越前,即y越小越靠前)
  80. # 阈值:如果两个框的x坐标差值小于图片宽度的20%,则认为它们x坐标相近
  81. x_threshold = max(w * 0.2, 80) # x坐标相近的阈值(图片宽度的20%,最小80像素)
  82. def sort_key(box):
  83. """
  84. 排序键函数:
  85. 1. 主要按x坐标(右边优先):将x坐标量化,使得x坐标相近的框会被归到同一组
  86. 2. 次要按y坐标(上面优先):在同一组内按y坐标排序
  87. 量化方法:将x坐标除以阈值并向下取整,这样x坐标相近的框会得到相同的量化值
  88. 然后使用 (-量化值, y坐标) 作为排序键,确保:
  89. - x坐标越大(越右)的框排在前面
  90. - x坐标相近的框按y坐标排序(越上越前)
  91. """
  92. # 将x坐标量化:除以阈值并向下取整
  93. # 使用向下取整确保x坐标相近的框会被归到同一组
  94. x_quantized = int(box['center_x'] // x_threshold)
  95. # 返回排序键:(-x_quantized, center_y)
  96. # -x_quantized 确保右边(x更大)的框排在前面
  97. # center_y 确保x坐标相近的框按y坐标排序(越上越前)
  98. return (-x_quantized, box['center_y'])
  99. # 使用排序键排序
  100. green_boxes.sort(key=sort_key)
  101. # 打印排序后的框信息(用于调试)
  102. print(f"[DEBUG] 排序后的绿色框顺序 (x_threshold={x_threshold:.1f}):")
  103. for i, box in enumerate(green_boxes, 1):
  104. x_quantized = int(box['center_x'] // x_threshold)
  105. print(f" [{i}] center_x={box['center_x']:.1f}, center_y={box['center_y']:.1f}, x_quantized={x_quantized}, 位置=({box['x1']},{box['y1']}) -> ({box['x2']},{box['y2']})")
  106. return green_boxes
  107. def cut_dialog_blocks_by_green_box(image_path, output_dir, padding=5):
  108. """
  109. 检测绿色线框并切图
  110. 参数:
  111. image_path: 包含绿色线框的图片路径
  112. output_dir: 输出目录
  113. padding: 裁剪时添加的边距(像素)
  114. 返回:
  115. 保存的文件信息列表
  116. """
  117. # 检测绿色框
  118. green_boxes = detect_green_boxes(image_path)
  119. if not green_boxes:
  120. print("[WARN] 未检测到任何绿色框")
  121. return []
  122. # 读取原图(用于裁剪)
  123. img_array = np.fromfile(str(image_path), dtype=np.uint8)
  124. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  125. if img is None:
  126. raise ValueError(f"无法读取图片: {image_path}")
  127. img_height, img_width = img.shape[:2]
  128. # 确保输出目录存在
  129. output_dir = Path(output_dir)
  130. output_dir.mkdir(parents=True, exist_ok=True)
  131. # 创建绿色掩码(用于排除绿色像素)
  132. green_mask = cv2.inRange(img, np.array((0, 200, 0)), np.array((50, 255, 50)))
  133. # 裁剪并保存每个绿色框内的内容
  134. saved_files = []
  135. print(f"[DEBUG] 开始保存文件,共 {len(green_boxes)} 个框:")
  136. for i, box in enumerate(green_boxes):
  137. print(f"[DEBUG] 处理框 {i+1}: center_x={box['center_x']:.1f}, center_y={box['center_y']:.1f}, 位置=({box['x1']},{box['y1']}) -> ({box['x2']},{box['y2']})")
  138. # 步骤1: 向内收缩边界,排除绿色线框本身(线框通常2-3像素宽)
  139. # 这样裁剪的区域只包含框内的内容,不包含绿色线框
  140. line_thickness = 3 # 绿色线框的估计厚度
  141. x1 = max(0, box['x1'] + line_thickness)
  142. y1 = max(0, box['y1'] + line_thickness)
  143. x2 = min(img_width, box['x2'] - line_thickness)
  144. y2 = min(img_height, box['y2'] - line_thickness)
  145. # 步骤2: 添加边距(向内收缩后再向外扩展,但不超过原图边界)
  146. x1 = max(0, x1 - padding)
  147. y1 = max(0, y1 - padding)
  148. x2 = min(img_width, x2 + padding)
  149. y2 = min(img_height, y2 + padding)
  150. if x2 <= x1 or y2 <= y1:
  151. print(f" [WARN] 绿色框 {i + 1} 裁剪区域无效,跳过")
  152. continue
  153. # 步骤3: 裁剪原图内容(只包含绿色框内的区域)
  154. crop = img[y1:y2, x1:x2].copy()
  155. # 步骤4: 获取对应区域的绿色掩码,将残留的绿色像素替换为白色背景
  156. # 这样可以确保输出图片不包含任何绿色线框
  157. crop_green_mask = green_mask[y1:y2, x1:x2]
  158. crop[crop_green_mask > 0] = [255, 255, 255] # BGR格式的白色
  159. # 保存(使用dialog_{i}.png格式,与Node.js期望的格式一致)
  160. block_filename = f"dialog_{i + 1}.png"
  161. block_path = output_dir / block_filename
  162. success, encoded_img = cv2.imencode('.png', crop)
  163. if success:
  164. encoded_img.tofile(str(block_path))
  165. saved_files.append({
  166. 'group_index': i + 1,
  167. 'file_path': str(block_path),
  168. 'file_name': block_filename,
  169. 'bbox': {
  170. 'x1': x1,
  171. 'y1': y1,
  172. 'x2': x2,
  173. 'y2': y2
  174. },
  175. 'blocks_count': 1
  176. })
  177. # 添加更详细的调试信息,显示框的中心坐标
  178. print(f" [{i + 1}/{len(green_boxes)}] ✅ 已保存: {block_filename} (区域: {x1},{y1} -> {x2},{y2}, center_x={box['center_x']:.1f}, center_y={box['center_y']:.1f})")
  179. else:
  180. print(f" [ERROR] 绿色框 {i + 1} 保存失败")
  181. print(f"\n✅ 成功裁剪并保存 {len(saved_files)} 个文字块")
  182. return saved_files
  183. if __name__ == '__main__':
  184. if len(sys.argv) < 3:
  185. print("用法: python cut_dialog_blocks_by_green_box.py <image_path> <output_dir> [padding]")
  186. print("示例: python cut_dialog_blocks_by_green_box.py image.png output_dir 5")
  187. sys.exit(1)
  188. image_path = sys.argv[1]
  189. output_dir = sys.argv[2]
  190. padding = int(sys.argv[3]) if len(sys.argv) > 3 else 5
  191. saved_files = cut_dialog_blocks_by_green_box(image_path, output_dir, padding)
  192. # 输出JSON结果
  193. result = {
  194. 'image_path': image_path,
  195. 'output_dir': output_dir,
  196. 'total_blocks': len(saved_files),
  197. 'saved_files': saved_files
  198. }
  199. result_json_path = Path(output_dir) / 'cut_blocks_result.json'
  200. with open(result_json_path, 'w', encoding='utf-8') as f:
  201. json.dump(result, f, ensure_ascii=False, indent=2)
  202. print(f"\n✅ 结果已保存到: {result_json_path}")