| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- # -*- coding: utf-8 -*-
- """
- 在格子内检测并切割文字区域
- """
- import sys
- import cv2
- import numpy as np
- from pathlib import Path
- # Windows编码修复
- if sys.platform == 'win32':
- import io
- sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
- sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
- def cut_text_regions_in_panel(panel_image_path, output_dir):
- """
- 在格子图片中检测文字区域并切割成小块
-
- 参数:
- panel_image_path: 格子图片路径
- output_dir: 输出目录
- """
- panel_image_path = Path(panel_image_path)
- output_dir = Path(output_dir)
- output_dir.mkdir(parents=True, exist_ok=True)
-
- # 读取格子图片
- img_array = np.fromfile(str(panel_image_path), dtype=np.uint8)
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
- if img is None:
- raise ValueError(f"无法读取图片: {panel_image_path}")
-
- img_height, img_width = img.shape[:2]
-
- # 转换为灰度图
- if len(img.shape) == 3:
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- else:
- gray = img.copy()
-
- # 使用自适应阈值进行二值化
- binary = cv2.adaptiveThreshold(
- gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
- cv2.THRESH_BINARY_INV, 11, 2
- )
-
- # 形态学操作,连接相近的文字
- kernel = np.ones((3, 3), np.uint8)
- binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)
- binary = cv2.dilate(binary, kernel, iterations=1)
-
- # 查找轮廓
- contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-
- # 过滤轮廓,找到文字区域
- text_regions = []
- min_area = (img_width * img_height) * 0.001 # 最小面积(0.1%)
- max_area = (img_width * img_height) * 0.5 # 最大面积(50%)
-
- for contour in contours:
- area = cv2.contourArea(contour)
- if min_area < area < max_area:
- x, y, w, h = cv2.boundingRect(contour)
- # 过滤掉太小的区域
- if w > 10 and h > 10:
- text_regions.append({
- 'x': x,
- 'y': y,
- 'width': w,
- 'height': h,
- 'center_x': x + w / 2,
- 'center_y': y + h / 2
- })
-
- # 按照最右边(X坐标最大)最靠前,然后最上面(Y坐标最小)最靠前的原则排序
- text_regions.sort(key=lambda r: (-r['center_x'], r['center_y']))
-
- # 切割每个文字区域
- panel_name = panel_image_path.stem
- cut_files = []
-
- for idx, region in enumerate(text_regions, 1):
- x = max(0, region['x'])
- y = max(0, region['y'])
- w = min(region['width'], img_width - x)
- h = min(region['height'], img_height - y)
-
- if w <= 0 or h <= 0:
- continue
-
- # 切割文字区域
- text_roi = img[y:y+h, x:x+w]
-
- # 保存切割后的图片
- output_filename = f"{panel_name}_text{idx}.png"
- output_path = output_dir / output_filename
-
- # 使用cv2.imencode处理中文路径
- success, encoded_img = cv2.imencode('.png', text_roi)
- if success:
- encoded_img.tofile(str(output_path))
- cut_files.append(str(output_path))
-
- return cut_files
- if __name__ == '__main__':
- if len(sys.argv) < 3:
- print("用法: python cut_text_regions_in_panel.py <格子图片路径> <输出目录>")
- sys.exit(1)
-
- panel_image_path = sys.argv[1]
- output_dir = sys.argv[2]
-
- try:
- cut_files = cut_text_regions_in_panel(panel_image_path, output_dir)
- print(f"✅ 成功切割 {len(cut_files)} 个文字区域")
- except Exception as e:
- print(f"[ERROR] 切割失败: {e}")
- import traceback
- traceback.print_exc()
- sys.exit(1)
|