| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252 |
- # -*- coding: utf-8 -*-
- """
- 检测图片中的绿色线框,并沿着绿色线框切图
- 适用于带绿色线框标记的文字区域图片
- """
- import sys
- import json
- import cv2
- import numpy as np
- from pathlib import Path
- # Windows编码修复
- if sys.platform == 'win32':
- import io
- sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
- sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
- def detect_green_boxes(image_path, green_color_lower=(0, 200, 0), green_color_upper=(50, 255, 50)):
- """
- 检测图片中的绿色矩形框(使用OpenCV,不使用任何外部数据)
-
- 参数:
- image_path: 图片路径(包含绿色线框的图片)
- green_color_lower: 绿色范围下限(BGR格式)
- green_color_upper: 绿色范围上限(BGR格式)
-
- 返回:
- 检测到的绿色框列表,每个包含 bbox 信息
- """
- # 读取图片(处理中文路径)
- img_array = np.fromfile(str(image_path), dtype=np.uint8)
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
- if img is None:
- raise ValueError(f"无法读取图片: {image_path}")
-
- print(f"[INFO] 读取图片: {Path(image_path).name}")
- print(f"[INFO] 图片尺寸: {img.shape[1]}x{img.shape[0]}")
-
- h, w = img.shape[:2]
-
- # 步骤1: 创建绿色掩码(检测绿色像素)
- green_mask = cv2.inRange(img, np.array(green_color_lower), np.array(green_color_upper))
-
- # 步骤2: 形态学操作,连接断开的绿色线条
- # 使用更大的kernel来更好地连接线条
- kernel = np.ones((5, 5), np.uint8)
- green_mask = cv2.morphologyEx(green_mask, cv2.MORPH_CLOSE, kernel, iterations=3)
- green_mask = cv2.dilate(green_mask, kernel, iterations=2)
-
- # 步骤3: 使用轮廓检测找到绿色框
- contours, _ = cv2.findContours(green_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-
- print(f"[INFO] 检测到 {len(contours)} 个绿色轮廓")
-
- # 步骤4: 提取矩形框并过滤
- green_boxes = []
- for i, contour in enumerate(contours):
- # 计算边界框
- x, y, box_w, box_h = cv2.boundingRect(contour)
-
- # 过滤太小的区域(可能是噪点)
- min_area = w * h * 0.005 # 至少占图片面积的0.5%(提高阈值,过滤小噪点)
- if box_w * box_h < min_area:
- continue
-
- # 过滤太细的线(宽度或高度小于10像素的可能是线条而不是框)
- if box_w < 10 or box_h < 10:
- continue
-
- # 计算轮廓面积与边界框面积的比值,过滤不规则的形状
- contour_area = cv2.contourArea(contour)
- bbox_area = box_w * box_h
- if bbox_area > 0:
- area_ratio = contour_area / bbox_area
- # 如果比值太小,可能是噪点或断开的线条
- if area_ratio < 0.1:
- continue
-
- green_boxes.append({
- 'x1': int(x),
- 'y1': int(y),
- 'x2': int(x + box_w),
- 'y2': int(y + box_h),
- 'width': int(box_w),
- 'height': int(box_h),
- 'center_x': float(x + box_w / 2),
- 'center_y': float(y + box_h / 2),
- 'area': int(box_w * box_h)
- })
-
- print(f"[INFO] 过滤后剩余 {len(green_boxes)} 个绿色框")
-
- # 步骤5: 按位置排序
- # 排序规则:
- # 1. 最优先:按x坐标排序(右边优先,即x越大越靠前)
- # 2. 如果x坐标相近(差值小于阈值),再按y坐标排序(越上越前,即y越小越靠前)
- # 阈值:如果两个框的x坐标差值小于图片宽度的20%,则认为它们x坐标相近
- x_threshold = max(w * 0.2, 80) # x坐标相近的阈值(图片宽度的20%,最小80像素)
-
- def sort_key(box):
- """
- 排序键函数:
- 1. 主要按x坐标(右边优先):将x坐标量化,使得x坐标相近的框会被归到同一组
- 2. 次要按y坐标(上面优先):在同一组内按y坐标排序
-
- 量化方法:将x坐标除以阈值并向下取整,这样x坐标相近的框会得到相同的量化值
- 然后使用 (-量化值, y坐标) 作为排序键,确保:
- - x坐标越大(越右)的框排在前面
- - x坐标相近的框按y坐标排序(越上越前)
- """
- # 将x坐标量化:除以阈值并向下取整
- # 使用向下取整确保x坐标相近的框会被归到同一组
- x_quantized = int(box['center_x'] // x_threshold)
- # 返回排序键:(-x_quantized, center_y)
- # -x_quantized 确保右边(x更大)的框排在前面
- # center_y 确保x坐标相近的框按y坐标排序(越上越前)
- return (-x_quantized, box['center_y'])
-
- # 使用排序键排序
- green_boxes.sort(key=sort_key)
-
- # 打印排序后的框信息(用于调试)
- print(f"[DEBUG] 排序后的绿色框顺序 (x_threshold={x_threshold:.1f}):")
- for i, box in enumerate(green_boxes, 1):
- x_quantized = int(box['center_x'] // x_threshold)
- print(f" [{i}] center_x={box['center_x']:.1f}, center_y={box['center_y']:.1f}, x_quantized={x_quantized}, 位置=({box['x1']},{box['y1']}) -> ({box['x2']},{box['y2']})")
-
- return green_boxes
- def cut_dialog_blocks_by_green_box(image_path, output_dir, padding=5):
- """
- 检测绿色线框并切图
-
- 参数:
- image_path: 包含绿色线框的图片路径
- output_dir: 输出目录
- padding: 裁剪时添加的边距(像素)
-
- 返回:
- 保存的文件信息列表
- """
- # 检测绿色框
- green_boxes = detect_green_boxes(image_path)
-
- if not green_boxes:
- print("[WARN] 未检测到任何绿色框")
- return []
-
- # 读取原图(用于裁剪)
- img_array = np.fromfile(str(image_path), dtype=np.uint8)
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
- if img is None:
- raise ValueError(f"无法读取图片: {image_path}")
-
- img_height, img_width = img.shape[:2]
-
- # 确保输出目录存在
- output_dir = Path(output_dir)
- output_dir.mkdir(parents=True, exist_ok=True)
-
- # 创建绿色掩码(用于排除绿色像素)
- green_mask = cv2.inRange(img, np.array((0, 200, 0)), np.array((50, 255, 50)))
-
- # 裁剪并保存每个绿色框内的内容
- saved_files = []
- print(f"[DEBUG] 开始保存文件,共 {len(green_boxes)} 个框:")
- for i, box in enumerate(green_boxes):
- print(f"[DEBUG] 处理框 {i+1}: center_x={box['center_x']:.1f}, center_y={box['center_y']:.1f}, 位置=({box['x1']},{box['y1']}) -> ({box['x2']},{box['y2']})")
- # 步骤1: 向内收缩边界,排除绿色线框本身(线框通常2-3像素宽)
- # 这样裁剪的区域只包含框内的内容,不包含绿色线框
- line_thickness = 3 # 绿色线框的估计厚度
- x1 = max(0, box['x1'] + line_thickness)
- y1 = max(0, box['y1'] + line_thickness)
- x2 = min(img_width, box['x2'] - line_thickness)
- y2 = min(img_height, box['y2'] - line_thickness)
-
- # 步骤2: 添加边距(向内收缩后再向外扩展,但不超过原图边界)
- x1 = max(0, x1 - padding)
- y1 = max(0, y1 - padding)
- x2 = min(img_width, x2 + padding)
- y2 = min(img_height, y2 + padding)
-
- if x2 <= x1 or y2 <= y1:
- print(f" [WARN] 绿色框 {i + 1} 裁剪区域无效,跳过")
- continue
-
- # 步骤3: 裁剪原图内容(只包含绿色框内的区域)
- crop = img[y1:y2, x1:x2].copy()
-
- # 步骤4: 获取对应区域的绿色掩码,将残留的绿色像素替换为白色背景
- # 这样可以确保输出图片不包含任何绿色线框
- crop_green_mask = green_mask[y1:y2, x1:x2]
- crop[crop_green_mask > 0] = [255, 255, 255] # BGR格式的白色
-
- # 保存(使用dialog_{i}.png格式,与Node.js期望的格式一致)
- block_filename = f"dialog_{i + 1}.png"
- block_path = output_dir / block_filename
-
- success, encoded_img = cv2.imencode('.png', crop)
- if success:
- encoded_img.tofile(str(block_path))
- saved_files.append({
- 'group_index': i + 1,
- 'file_path': str(block_path),
- 'file_name': block_filename,
- 'bbox': {
- 'x1': x1,
- 'y1': y1,
- 'x2': x2,
- 'y2': y2
- },
- 'blocks_count': 1
- })
- # 添加更详细的调试信息,显示框的中心坐标
- print(f" [{i + 1}/{len(green_boxes)}] ✅ 已保存: {block_filename} (区域: {x1},{y1} -> {x2},{y2}, center_x={box['center_x']:.1f}, center_y={box['center_y']:.1f})")
- else:
- print(f" [ERROR] 绿色框 {i + 1} 保存失败")
-
- print(f"\n✅ 成功裁剪并保存 {len(saved_files)} 个文字块")
- return saved_files
- if __name__ == '__main__':
- if len(sys.argv) < 3:
- print("用法: python cut_dialog_blocks_by_green_box.py <image_path> <output_dir> [padding]")
- print("示例: python cut_dialog_blocks_by_green_box.py image.png output_dir 5")
- sys.exit(1)
-
- image_path = sys.argv[1]
- output_dir = sys.argv[2]
- padding = int(sys.argv[3]) if len(sys.argv) > 3 else 5
-
- saved_files = cut_dialog_blocks_by_green_box(image_path, output_dir, padding)
-
- # 输出JSON结果
- result = {
- 'image_path': image_path,
- 'output_dir': output_dir,
- 'total_blocks': len(saved_files),
- 'saved_files': saved_files
- }
-
- result_json_path = Path(output_dir) / 'cut_blocks_result.json'
- with open(result_json_path, 'w', encoding='utf-8') as f:
- json.dump(result, f, ensure_ascii=False, indent=2)
-
- print(f"\n✅ 结果已保存到: {result_json_path}")
|