yichael
/
AIStoryBoard


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
							# -*- coding: utf-8 -*-
"""
在图片上绘制文字块区域的绿色线框
根据OCR JSON文件中的bbox信息，在图片上标记文字区域
"""

import sys
import json
import cv2
import numpy as np
from pathlib import Path

# Windows编码修复
if sys.platform == 'win32':
    import io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')


def draw_text_blocks(image_path, json_path, output_path, line_color=(0, 255, 0), line_thickness=2):
    """
    在图片上绘制文字块区域的绿色线框
    
    参数:
        image_path: 图片路径
        json_path: OCR JSON文件路径（包含bbox信息）
        output_path: 输出图片路径
        line_color: 线框颜色（BGR格式，默认绿色 (0, 255, 0)）
        line_thickness: 线框粗细（默认2像素）
    
    返回:
        成功返回True，失败返回False
    """
    try:
        # 读取图片（处理中文路径）
        img_array = np.fromfile(str(image_path), dtype=np.uint8)
        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
        
        if img is None:
            print(f"[ERROR] 无法读取图片: {image_path}")
            return False
        
        print(f"[INFO] 读取图片: {Path(image_path).name}")
        print(f"[INFO] 图片尺寸: {img.shape[1]}x{img.shape[0]}")
        
        # 读取JSON文件
        with open(json_path, 'r', encoding='utf-8') as f:
            json_data = json.load(f)
        
        print(f"[INFO] 读取JSON文件: {Path(json_path).name}")
        
        # 提取文字块bbox信息
        text_blocks = []
        
        # 支持两种JSON格式：
        # 1. dialogues格式：{ "dialogues": [{"bbox": {...}, ...}, ...] }
        # 2. text_blocks格式：{ "text_blocks": [{"bbox": {...}, ...}, ...] }
        if 'dialogues' in json_data:
            text_blocks = json_data['dialogues']
            print(f"[INFO] 检测到 {len(text_blocks)} 个对话区域")
        elif 'text_blocks' in json_data:
            text_blocks = json_data['text_blocks']
            print(f"[INFO] 检测到 {len(text_blocks)} 个文字块区域")
        else:
            print(f"[WARN] JSON文件中未找到 'dialogues' 或 'text_blocks' 字段")
            # 尝试直接使用根级别的数组
            if isinstance(json_data, list):
                text_blocks = json_data
                print(f"[INFO] 使用根级别数组，检测到 {len(text_blocks)} 个区域")
            else:
                print(f"[ERROR] 无法解析JSON文件格式")
                return False
        
        # 在图片上绘制绿色矩形框
        drawn_count = 0
        for i, block in enumerate(text_blocks):
            bbox = block.get('bbox')
            if not bbox:
                continue
            
            # 支持多种bbox格式：
            # 1. { "x1": int, "y1": int, "x2": int, "y2": int }
            # 2. { "x": int, "y": int, "width": int, "height": int }
            # 3. [x1, y1, x2, y2] 数组格式
            # 4. { "xyxy": [x1, y1, x2, y2] }
            
            x1, y1, x2, y2 = None, None, None, None
            
            if isinstance(bbox, dict):
                if 'x1' in bbox and 'y1' in bbox and 'x2' in bbox and 'y2' in bbox:
                    x1, y1, x2, y2 = int(bbox['x1']), int(bbox['y1']), int(bbox['x2']), int(bbox['y2'])
                elif 'x' in bbox and 'y' in bbox and 'width' in bbox and 'height' in bbox:
                    x1 = int(bbox['x'])
                    y1 = int(bbox['y'])
                    x2 = int(bbox['x'] + bbox['width'])
                    y2 = int(bbox['y'] + bbox['height'])
                elif 'xyxy' in bbox and isinstance(bbox['xyxy'], list) and len(bbox['xyxy']) >= 4:
                    x1, y1, x2, y2 = int(bbox['xyxy'][0]), int(bbox['xyxy'][1]), int(bbox['xyxy'][2]), int(bbox['xyxy'][3])
            elif isinstance(bbox, list) and len(bbox) >= 4:
                x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
            
            if x1 is not None and y1 is not None and x2 is not None and y2 is not None:
                # 确保坐标在图片范围内
                x1 = max(0, min(x1, img.shape[1] - 1))
                y1 = max(0, min(y1, img.shape[0] - 1))
                x2 = max(0, min(x2, img.shape[1] - 1))
                y2 = max(0, min(y2, img.shape[0] - 1))
                
                # 确保 x1 < x2, y1 < y2
                if x1 >= x2 or y1 >= y2:
                    print(f"  [WARN] 文字块 {i + 1} 的bbox无效: ({x1}, {y1}) -> ({x2}, {y2})，跳过")
                    continue
                
                # 绘制绿色矩形框
                cv2.rectangle(img, (x1, y1), (x2, y2), line_color, line_thickness)
                
                # 可选：在框上标注序号
                label = str(i + 1)
                font = cv2.FONT_HERSHEY_SIMPLEX
                font_scale = 0.6
                font_thickness = 1
                (text_width, text_height), baseline = cv2.getTextSize(label, font, font_scale, font_thickness)
                
                # 在左上角绘制文字背景（白色矩形）
                cv2.rectangle(img, (x1, y1 - text_height - baseline - 2), 
                            (x1 + text_width, y1), (255, 255, 255), -1)
                # 绘制文字（绿色）
                cv2.putText(img, label, (x1, y1 - baseline - 2), 
                          font, font_scale, line_color, font_thickness, cv2.LINE_AA)
                
                drawn_count += 1
            else:
                print(f"  [WARN] 文字块 {i + 1} 的bbox格式不支持，跳过")
        
        print(f"[INFO] 成功绘制 {drawn_count} 个文字块区域")
        
        # 保存图片（处理中文路径）
        success, encoded_img = cv2.imencode('.png', img)
        if success:
            encoded_img.tofile(str(output_path))
            print(f"[OK] 已保存标记后的图片: {output_path}")
            return True
        else:
            print(f"[ERROR] 保存图片失败: {output_path}")
            return False
            
    except Exception as e:
        print(f"[ERROR] 处理失败: {e}")
        import traceback
        traceback.print_exc()
        return False


if __name__ == '__main__':
    if len(sys.argv) < 4:
        print("用法: python draw_text_blocks.py <image_path> <json_path> <output_path> [line_thickness]")
        print("示例: python draw_text_blocks.py image.png dialogues.json output.png 2")
        sys.exit(1)
    
    image_path = sys.argv[1]
    json_path = sys.argv[2]
    output_path = sys.argv[3]
    line_thickness = int(sys.argv[4]) if len(sys.argv) > 4 else 2
    
    success = draw_text_blocks(image_path, json_path, output_path, 
                              line_color=(0, 255, 0), line_thickness=line_thickness)
    
    sys.exit(0 if success else 1)