yichael
/
AIStoryBoard


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
							/**
 * 步骤：
 * 1. 新建接收imagePath参数
 * 2. 新建接收textMaskImgPath参数 用来接收输出文字区域识别框图片路径
 * 3. 新建接收textBlocksJsonPath参数用来接收输出文字区域JSON路径
 * 4. 调用comic-text-detector生成文字区域坐标JSON
 * 5. 复制imagePath 在这个图片基础上，根据json坐标绘制出所有文字区域绿色线框
 * 5. 保存带有绿色线框的图片保存到textMaskImgPath路径
 */

import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import { execSync } from 'child_process';
import { getPythonPath } from './python-path.js';

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const projectRoot = path.join(__dirname, '..');

/**
 * 预设检测配置
 */
const DETECTION_PRESETS = {
  // 标准配置（默认）
  standard: {
    inputSize: 1536,      // 输入尺寸，越大精度越高但速度越慢
    confThresh: 0.1,      // 置信度阈值，0-1，越高越严格
    nmsThresh: 0.1,       // NMS阈值，0-1，越高保留越多重叠框
    maskThresh: 0.3,      // Mask阈值，0-1，用于分割网络
    act: 'leaky',         // 激活函数，可选'leaky'或'relu'
    refineMode: 0,        // 0=INPAINT, 1=ANNOTATION
    keepUndetectedMask: 0, // 是否保留未检测区域
    erodeIterations: 0,   // 腐蚀迭代次数
    invertMask: 1         // 反转mask（白底黑字）
  },
  
  // 高精度配置（保留更多文字细节）
  high_detail: {
    inputSize: 2048,      // 更大输入尺寸，保持更多细节
    confThresh: 0.05,     // 更低置信度阈值，捕获更多文字
    nmsThresh: 0.2,       // 适中NMS阈值，保留更多重叠区域
    maskThresh: 0.2,     // 更低Mask阈值，保留更多细节
    act: 'leaky',         
    refineMode: 0,        
    keepUndetectedMask: 0, 
    erodeIterations: -1,  // 负数表示膨胀操作，让文字更粗
    invertMask: 1         
  },
  
  // 超高精度配置（最大化保留细节，速度较慢）
  ultra_detail: {
    inputSize: 3072,      // 极高输入尺寸（原图1334x1940的2.3倍）
    confThresh: 0.01,     // 极低置信度阈值，捕获所有可能的文字
    nmsThresh: 0.3,       // 更宽松的NMS，保留更多重叠区域
    maskThresh: 0.05,     // 极低的Mask阈值，最大化细节保留
    act: 'leaky',         
    refineMode: 0,        
    keepUndetectedMask: 0, 
    erodeIterations: -3,  // 3次膨胀操作，强力填充文字空洞和细节
    invertMask: 1         
  },
  
  // 日文精细优化配置（专门针对复杂汉字和假名）
  japanese_fine: {
    inputSize: 2048,      // 适中输入尺寸，平衡质量和处理效果
    confThresh: 0.08,     // 较低置信度，但避免过多噪声
    nmsThresh: 0.2,       // 适中的NMS，保留重要重叠
    maskThresh: 0.2,      // 适中Mask阈值，平衡细节和清晰度
    act: 'leaky',         
    refineMode: 0,        // INPAINT模式，更好的文字连贯性
    keepUndetectedMask: 0, 
    erodeIterations: -1,  // 只1次膨胀，轻微填充空洞但保持清晰
    invertMask: 1         
  },
  
  // 清晰优先配置（减少模糊，保持锐利）
  sharp_detail: {
    inputSize: 2048,      // 高分辨率但不过度
    confThresh: 0.15,     // 更高置信度，减少噪声
    nmsThresh: 0.15,      // 较严格NMS，避免重叠干扰  
    maskThresh: 0.25,     // 中等Mask阈值，保持清晰度
    act: 'leaky',         
    refineMode: 0,        // INPAINT模式
    keepUndetectedMask: 0, 
    erodeIterations: 0,   // 不做形态学操作，保持原始锐度
    invertMask: 1         
  },
  
  // 极致清晰配置（最锐利文字）
  ultra_sharp: {
    inputSize: 1536,      // 适中尺寸，避免过度处理
    confThresh: 0.3,      // 高置信度，只保留清晰文字
    nmsThresh: 0.1,       // 严格NMS，避免重叠模糊
    maskThresh: 0.4,      // 高阈值，只保留最清晰部分
    act: 'leaky',         
    refineMode: 0,        
    keepUndetectedMask: 0, 
    erodeIterations: 1,   // 1次腐蚀，让文字更细更锐利
    invertMask: 1         
  },
  
  // 锐化专用配置（腐蚀+高阈值）
  crisp_text: {
    inputSize: 1280,      // 较小尺寸，减少噪声
    confThresh: 0.4,      // 很高置信度
    nmsThresh: 0.05,      // 非常严格NMS
    maskThresh: 0.5,      // 很高Mask阈值
    act: 'leaky',         
    refineMode: 0,        
    keepUndetectedMask: 0, 
    erodeIterations: 2,   // 2次腐蚀，强力锐化
    invertMask: 1         
  },
  
  // 极致锐化配置（最强腐蚀+极高阈值）
  extreme_sharp: {
    inputSize: 1280,      // 图像处理尺寸，适中避免过度处理
    confThresh: 0.5,      // 置信度阈值，越高越严格
    nmsThresh: 0.03,      // NMS阈值，越低越严格
    maskThresh: 0.6,      // Mask阈值，越高越清晰
    act: 'leaky',         // 激活函数
    refineMode: 0,        // 0=INPAINT, 1=ANNOTATION
    keepUndetectedMask: 0, 
    erodeIterations: 3,   // 腐蚀次数，正数=腐蚀让文字更细更锐利
    invertMask: 1         // 反转mask（白底黑字）
  },
  
  // 精细平衡配置（轻微腐蚀+适中阈值，让文字细但可见）
  fine_balance: {
    inputSize: 1536,      // 适中分辨率，保持细节
    confThresh: 0.3,      // 适中置信度，保留更多文字
    nmsThresh: 0.1,       // 适中NMS
    maskThresh: 0.4,      // 适中阈值，平衡清晰度和可见性
    act: 'leaky',         
    refineMode: 0,        
    keepUndetectedMask: 0, 
    erodeIterations: 1,   // 只1次腐蚀，让文字细但不消失
    invertMask: 1         
  },
  
  // 微调锐化配置（最小腐蚀+优化阈值）
  subtle_sharp: {
    inputSize: 1536,      // 高分辨率处理
    confThresh: 0.25,     // 较低置信度，保留细节
    nmsThresh: 0.15,      // 适中NMS
    maskThresh: 0.35,     // 适中阈值
    act: 'leaky',         
    refineMode: 0,        
    keepUndetectedMask: 0, 
    erodeIterations: 1,   // 1次轻微腐蚀，温和锐化
    invertMask: 1         
  },
  
  // 超细文字配置（2次腐蚀但保持可见性）
  ultra_thin: {
    inputSize: 1536,      // 高分辨率保持细节
    confThresh: 0.2,      // 更低置信度，确保不丢失细节
    nmsThresh: 0.12,      // 适中NMS
    maskThresh: 0.45,     // 稍高阈值，让文字更细
    act: 'leaky',         
    refineMode: 0,        
    keepUndetectedMask: 0, 
    erodeIterations: 2,   // 2次腐蚀，让文字更细但仍可见
    invertMask: 1         
  },
  
  // 极细文字配置（接近极限的细度）
  super_thin: {
    inputSize: 1536,      // 高分辨率
    confThresh: 0.15,     // 很低置信度，保留所有细节
    nmsThresh: 0.08,      // 严格NMS
    maskThresh: 0.45,     // 高阈值，让文字非常细
    act: 'leaky',         
    refineMode: 0,        
    keepUndetectedMask: 0, 
    erodeIterations: 2.5, // 2.5次腐蚀（会被取整为2，但参数更激进）
    invertMask: 1         
  }
};

/**
 * 步骤1&2&3: 生成带绿色线框的文字遮罩图和坐标JSON
 * @param {string} imagePath - 步骤1: 输入图片路径参数
 * @param {string} textMaskImgPath - 步骤2: 输出文字遮罩图路径参数
 * @param {string} textBlocksJsonPath - 步骤3: 输出文字区域JSON路径参数
 * @param {string|Object} detectionConfig - 检测配置：'standard'/'high_detail'/'ultra_detail' 或自定义配置对象
 * @returns {Object} 生成结果
 */
async function startComicTextDetector(imagePath, textMaskImgPath, textBlocksJsonPath) {
  try {
    console.log('📖 开始生成带绿色线框的文字区域识别图和坐标JSON');
    console.log(`📷 输入图片: ${imagePath}`);
    console.log(`🎯 输出识别框图: ${textMaskImgPath}`);
    console.log(`📄 输出JSON: ${textBlocksJsonPath}`);
    
    // 步骤1: 验证imagePath参数
    if (!imagePath) {
      throw new Error('imagePath 参数不能为空');
    }
    
    if (!fs.existsSync(imagePath)) {
      throw new Error(`图片文件不存在: ${imagePath}`);
    }
    
    // 步骤2: 验证textMaskImgPath参数
    if (!textMaskImgPath) {
      throw new Error('textMaskImgPath 参数不能为空');
    }
    
    // 步骤3: 验证textBlocksJsonPath参数
    if (!textBlocksJsonPath) {
      throw new Error('textBlocksJsonPath 参数不能为空');
    }
    
    // 确保输出目录存在
    const outputDir = path.dirname(textMaskImgPath);
    if (!fs.existsSync(outputDir)) {
      fs.mkdirSync(outputDir, { recursive: true });
    }
    
    const jsonOutputDir = path.dirname(textBlocksJsonPath);
    if (!fs.existsSync(jsonOutputDir)) {
      fs.mkdirSync(jsonOutputDir, { recursive: true });
    }
    
    // 步骤4: 调用comic-text-detector生成文字区域坐标JSON
    console.log('\n🔍 步骤4: 正在调用文字检测器生成文字区域坐标...');
    const textRegions = await generateTextRegionsOnly(imagePath, outputDir, 'ultra_thin');
    
    // 步骤5: 保存坐标JSON文件
    console.log('\n📄 步骤5: 生成坐标JSON文件...');
    await saveTextRegionsJson(textRegions, textBlocksJsonPath);
    
    // 步骤6: 复制原图片并在其上绘制绿色识别框
    console.log('\n🎨 步骤6: 在原图片上绘制绿色文字区域识别框...');
    await drawGreenBoxesOnOriginalImage(imagePath, textRegions, textMaskImgPath);
    
    // 步骤7: 验证文件保存
    console.log('\n💾 步骤7: 验证文件保存...');
    await verifyMaskSaved(textMaskImgPath); // 使用现有函数验证图片
    await verifyJsonSaved(textBlocksJsonPath);
    
    console.log('✅ 带绿色线框的文字区域识别图和坐标JSON生成完成');
    return {
      textMaskImgPath: textMaskImgPath,
      textBlocksJsonPath: textBlocksJsonPath,
      success: true
    };
    
  } catch (error) {
    console.error(`❌ 带绿色线框的文字区域识别图生成失败: ${error.message}`);
    throw error;
  }
}

/**
 * 步骤4: 调用comic-text-detector只生成文字区域坐标
 * @param {string} imagePath - 输入图片路径
 * @param {string} outputDir - 输出目录
 * @param {string|Object} detectionConfig - 检测配置
 * @returns {Array} 文字区域数组
 */
async function generateTextRegionsOnly(imagePath, outputDir, detectionConfig = 'standard') {
  const pythonEnv = getPythonPath();
  const pythonScript = path.join(projectRoot, 'python', 'generate-anim', 'detect_comic_text_with_boxes.py');
  
  // 检查Python脚本是否存在
  if (!fs.existsSync(pythonScript)) {
    throw new Error(`Python脚本不存在: ${pythonScript}`);
  }
  
  // 解析检测配置
  let params;
  if (typeof detectionConfig === 'string') {
    if (!DETECTION_PRESETS[detectionConfig]) {
      console.warn(`⚠️ 未知的预设配置: ${detectionConfig}，使用标准配置`);
      params = DETECTION_PRESETS.standard;
    } else {
      params = DETECTION_PRESETS[detectionConfig];
      console.log(`📋 使用预设配置: ${detectionConfig}`);
    }
  } else if (typeof detectionConfig === 'object') {
    params = { ...DETECTION_PRESETS.standard, ...detectionConfig };
    console.log(`📋 使用自定义配置`);
  } else {
    params = DETECTION_PRESETS.standard;
    console.log(`📋 使用默认标准配置`);
  }
  
  // 构建命令 - 传递输出目录给Python脚本
  const command = `"${pythonEnv}" "${pythonScript}" "${imagePath}" "${outputDir}" "${projectRoot}" ${params.inputSize} ${params.confThresh} ${params.nmsThresh} ${params.maskThresh} "${params.act}" ${params.refineMode} ${params.keepUndetectedMask} ${params.erodeIterations} ${params.invertMask}`;
  
  console.log(`🔍 正在检测图片中的文字区域: ${path.basename(imagePath)}`);
  console.log(`⚙️ 检测参数: 尺寸=${params.inputSize}, 置信度=${params.confThresh}, Mask阈值=${params.maskThresh}, 腐蚀=${params.erodeIterations}`);
  
  // 执行Python脚本生成文字区域坐标JSON
  execSync(command, {
    encoding: 'utf-8',
    stdio: 'inherit',
    cwd: projectRoot,
    env: { 
      ...process.env, 
      PYTHONIOENCODING: 'utf-8',
      PYTHONUTF8: '1'
    },
    shell: true
  });
  
  // 读取生成的文字区域坐标JSON文件
  const baseImageName = path.basename(imagePath, path.extname(imagePath));
  const textRegionsJsonPath = path.join(outputDir, `${baseImageName}_text_regions.json`);
  
  // 等待JSON文件生成
  let retries = 50;
  while (retries > 0 && !fs.existsSync(textRegionsJsonPath)) {
    await new Promise(resolve => setTimeout(resolve, 100));
    retries--;
  }
  
  if (!fs.existsSync(textRegionsJsonPath)) {
    throw new Error(`步骤4失败: 文字区域坐标文件未生成: ${textRegionsJsonPath}`);
  }
  
  const textRegionsData = JSON.parse(fs.readFileSync(textRegionsJsonPath, 'utf-8'));
  
  console.log(`✅ 文字区域检测完成: 检测到 ${textRegionsData.text_blocks.length} 个区域`);
  return textRegionsData.text_blocks;
}

/**
 * 步骤5: 处理坐标JSON文件
 * @param {string} imagePath - 原图路径
 * @param {string} outputDir - 临时输出目录
 * @param {string} targetJsonPath - 目标JSON路径
 * @returns {Array} 文字区域数组
 */
async function processCoordinatesJson(imagePath, outputDir, targetJsonPath) {
  const imgBaseName = path.basename(imagePath, path.extname(imagePath));
  const textRegionsJsonPath = path.join(outputDir, `${imgBaseName}_text_regions.json`);
  
  // 等待文字区域JSON文件生成
  let retries = 50;
  while (retries > 0 && !fs.existsSync(textRegionsJsonPath)) {
    await new Promise(resolve => setTimeout(resolve, 100));
    retries--;
  }
  
  if (!fs.existsSync(textRegionsJsonPath)) {
    throw new Error(`步骤5失败: 文字区域JSON文件未生成: ${textRegionsJsonPath}`);
  }
  
  // 读取文字区域数据
  console.log(`📖 读取文字区域数据: ${path.basename(textRegionsJsonPath)}`);
  const textRegionsContent = fs.readFileSync(textRegionsJsonPath, 'utf-8');
  const textRegionsData = JSON.parse(textRegionsContent);
  
  // 转换为OCR兼容格式
  console.log(`🔄 转换为OCR兼容格式...`);
  const ocrCompatibleResult = {
    dialogues: []
  };
  
  const textRegions = [];
  
  if (textRegionsData.text_blocks && Array.isArray(textRegionsData.text_blocks)) {
    for (const block of textRegionsData.text_blocks) {
      const bbox = block.bbox;
      // 构造四个角点坐标（左上、右上、右下、左下）
      const bboxPoints = [
        [bbox.x1, bbox.y1],  // 左上
        [bbox.x2, bbox.y1],  // 右上
        [bbox.x2, bbox.y2],  // 右下
        [bbox.x1, bbox.y2]   // 左下
      ];
      
      const region = {
        bbox: bboxPoints,
        text: `[文字区域${block.index}]`,  // 占位符文字
        confidence: 0.95,  // 高置信度，因为是专门的检测器
        source: 'comic-text-detector',
        region_info: {
          width: bbox.width,
          height: bbox.height,
          center_x: bbox.center_x,
          center_y: bbox.center_y,
          vertical: block.vertical,
          language: block.language
        }
      };
      
      ocrCompatibleResult.dialogues.push(region);
      textRegions.push(region);
    }
  }
  
  // 对文字区域进行排序：从右到左，从上到下
  console.log(`🔄 对文字区域进行排序（从右到左，从上到下）...`);
  textRegions.sort((a, b) => {
    const centerA_x = (a.bbox[0][0] + a.bbox[2][0]) / 2;  // A区域中心点x
    const centerA_y = (a.bbox[0][1] + a.bbox[2][1]) / 2;  // A区域中心点y
    const centerB_x = (b.bbox[0][0] + b.bbox[2][0]) / 2;  // B区域中心点x
    const centerB_y = (b.bbox[0][1] + b.bbox[2][1]) / 2;  // B区域中心点y
    
    // 首先按x坐标降序排序（越靠右越靠前）
    if (Math.abs(centerA_x - centerB_x) > 50) { // 如果x坐标差距较大，按x排序
      return centerB_x - centerA_x; // 降序：右边的在前
    }
    // 如果x坐标相近，按y坐标升序排序（越靠上越靠前）
    return centerA_y - centerB_y; // 升序：上面的在前
  });
  
  // 更新排序后的OCR兼容结果
  ocrCompatibleResult.dialogues = textRegions;
  
  // 保存到指定路径
  console.log(`💾 保存OCR兼容JSON: ${path.basename(targetJsonPath)}`);
  fs.writeFileSync(targetJsonPath, JSON.stringify(ocrCompatibleResult, null, 2), 'utf-8');
  
  console.log(`✅ 转换完成: ${ocrCompatibleResult.dialogues.length} 个文字区域（已按从右到左、从上到下排序）`);
  return textRegions;
}

/**
 * 步骤5: 保存文字区域坐标JSON文件
 * @param {Array} textRegions - 文字区域数组
 * @param {string} textBlocksJsonPath - 目标JSON文件路径
 */
async function saveTextRegionsJson(textRegions, textBlocksJsonPath) {
  console.log('🔄 转换为OCR兼容格式...');
  
  // 将文字区域转换为OCR兼容的dialogues格式
  const dialogues = textRegions.map((block, index) => {
    // bbox格式: {x1: number, y1: number, x2: number, y2: number, ...}
    const bbox = block.bbox;
    const x1 = bbox.x1;
    const y1 = bbox.y1;
    const x2 = bbox.x2;
    const y2 = bbox.y2;
    const centerX = (x1 + x2) / 2;
    const centerY = (y1 + y2) / 2;
    
    return {
      bbox: [x1, y1, x2, y2], // 矩形格式
      center: [centerX, centerY],
      text: "", // 暂时为空，等待OCR识别
      confidence: block.confidence || 0.8,
      region_id: index + 1,
      source: "comic-text-detector"
    };
  });
  
  // 对文字区域进行排序：从右到左，然后从上到下
  console.log('🔄 对文字区域进行排序（从右到左，从上到下）...');
  dialogues.sort((a, b) => {
    // 先按X坐标从右到左排序（X值大的在前）
    const xDiff = b.center[0] - a.center[0];
    if (Math.abs(xDiff) > 50) { // 如果X坐标差距超过50像素，按X排序
      return xDiff;
    }
    // 如果X坐标接近，按Y坐标从上到下排序（Y值小的在前）
    return a.center[1] - b.center[1];
  });
  
  // 重新分配region_id
  dialogues.forEach((dialogue, index) => {
    dialogue.region_id = index + 1;
  });
  
  const resultJson = {
    image_file: "输入图片",
    dialogues: dialogues,
    total_count: dialogues.length,
    source: "comic-text-detector",
    processing_time: new Date().toISOString()
  };
  
  console.log(`💾 保存OCR兼容JSON: ${path.basename(textBlocksJsonPath)}`);
  fs.writeFileSync(textBlocksJsonPath, JSON.stringify(resultJson, null, 2), 'utf-8');
  
  console.log(`✅ 转换完成: ${dialogues.length} 个文字区域（已按从右到左、从上到下排序）`);
}

/**
 * 步骤6: 在原图片上绘制绿色文字区域识别框
 * @param {string} originalImagePath - 原图片路径
 * @param {Array} textRegions - 文字区域数组
 * @param {string} outputImagePath - 输出图片路径
 */
async function drawGreenBoxesOnOriginalImage(originalImagePath, textRegions, outputImagePath) {
  const pythonEnv = getPythonPath();
  const drawScript = path.join(projectRoot, 'python', 'generate-anim', 'draw_green_boxes_on_original_image.py');
  
  // 创建绘制绿色边框的Python脚本（如果不存在）
  if (!fs.existsSync(drawScript)) {
    console.log('📝 创建绘制绿色边框的Python脚本...');
    await createDrawGreenBoxesOnOriginalImageScript(drawScript);
  }
  
  // 将文字区域数据转换为Python脚本期望的格式并写入临时JSON文件
  const tempJsonPath = path.join(path.dirname(outputImagePath), 'temp_text_regions_for_drawing.json');
  
  // 转换数据格式：将{x1,y1,x2,y2}格式转换为[[x1,y1],[x2,y1],[x2,y2],[x1,y2]]格式
  const pythonFormatRegions = textRegions.map(region => ({
    bbox: [
      [region.bbox.x1, region.bbox.y1], // 左上
      [region.bbox.x2, region.bbox.y1], // 右上
      [region.bbox.x2, region.bbox.y2], // 右下
      [region.bbox.x1, region.bbox.y2]  // 左下
    ],
    index: region.index,
    vertical: region.vertical || false
  }));
  
  fs.writeFileSync(tempJsonPath, JSON.stringify(pythonFormatRegions, null, 2), 'utf-8');
  
  const absOriginalImagePath = path.resolve(originalImagePath);
  const absOutputImagePath = path.resolve(outputImagePath);
  const absTempJsonPath = path.resolve(tempJsonPath);
  
  const command = `"${pythonEnv}" "${drawScript}" "${absOriginalImagePath}" "${absTempJsonPath}" "${absOutputImagePath}"`;
  
  console.log(`🎨 在原图片上绘制 ${textRegions.length} 个绿色文字区域识别框...`);
  
  try {
    execSync(command, {
      encoding: 'utf-8',
      stdio: 'inherit',
      cwd: projectRoot,
      env: {
        ...process.env,
        PYTHONIOENCODING: 'utf-8',
        PYTHONUTF8: '1'
      },
      shell: true
    });
    
    console.log(`✅ 绿色识别框绘制完成: ${path.basename(outputImagePath)}`);
  } finally {
    // 删除临时文件
    if (fs.existsSync(tempJsonPath)) {
      fs.unlinkSync(tempJsonPath);
    }
  }
}

/**
 * 创建在原图片上绘制绿色边框的Python脚本
 * @param {string} scriptPath - 脚本路径
 */
async function createDrawGreenBoxesOnOriginalImageScript(scriptPath) {
  const scriptContent = `#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
在遮罩图上绘制绿色文字区域边框
"""
import cv2
import json
import sys
from pathlib import Path
import numpy as np

def draw_green_boxes_on_original_image(image_path, regions_json_path, output_path):
    """
    在原图片上绘制绿色边框（支持中文路径）
    """
    # 读取原图片（支持中文路径）
    image_data = np.fromfile(str(image_path), dtype=np.uint8)
    image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError(f"无法读取图片: {image_path}")
    
    print(f"[INFO] 图片尺寸: {image.shape[1]}x{image.shape[0]}")
    
    # 读取文字区域JSON
    with open(regions_json_path, 'r', encoding='utf-8') as f:
        text_regions = json.load(f)
    
    print(f"[INFO] 需要绘制 {len(text_regions)} 个绿色边框")
    
    # 绘制每个文字区域的绿色边框
    for i, region in enumerate(text_regions):
        bbox = region['bbox']
        # bbox格式: [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
        x1, y1 = int(bbox[0][0]), int(bbox[0][1])
        x2, y2 = int(bbox[2][0]), int(bbox[2][1])
        
        # 绘制绿色矩形框
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 3)  # 绿色，线宽3
        
        # 移除了编号相关逻辑
        
        print(f"[INFO] 绘制区域 {i+1}: ({x1},{y1}) -> ({x2},{y2})")
    
    # 保存结果（支持中文路径）
    success, encoded_img = cv2.imencode('.png', image)
    if success:
        encoded_img.tofile(str(output_path))
        print(f"[SUCCESS] 已保存带绿色边框的图片: {output_path}")
    else:
        raise RuntimeError(f"保存图片失败: {output_path}")

def main():
    if len(sys.argv) != 4:
        print("用法: python draw_green_boxes_on_original_image.py <原图片路径> <区域JSON路径> <输出图片路径>")
        sys.exit(1)
    
    image_path = Path(sys.argv[1])
    regions_json_path = Path(sys.argv[2])
    output_path = Path(sys.argv[3])
    
    try:
        draw_green_boxes_on_original_image(image_path, regions_json_path, output_path)
    except Exception as e:
        print(f"[ERROR] 绘制失败: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()
`;

  // 确保目录存在
  const scriptDir = path.dirname(scriptPath);
  if (!fs.existsSync(scriptDir)) {
    fs.mkdirSync(scriptDir, { recursive: true });
  }
  
  fs.writeFileSync(scriptPath, scriptContent, 'utf-8');
  console.log(`✅ Python绘制脚本已创建: ${path.basename(scriptPath)}`);
}

/**
 * 步骤7: 验证带绿色框的图片已保存到textMaskImgPath路径
 * @param {string} textMaskImgPath - 带绿色框的图片路径
 */
async function verifyMaskSaved(textMaskImgPath) {
  // 等待文件生成
  let retries = 50;
  while (retries > 0 && !fs.existsSync(textMaskImgPath)) {
    await new Promise(resolve => setTimeout(resolve, 100));
    retries--;
  }
  
  if (!fs.existsSync(textMaskImgPath)) {
    throw new Error(`步骤7失败: 带绿色边框的图片未保存到指定路径: ${textMaskImgPath}`);
  }
  
  // 验证文件大小
  const stats = fs.statSync(textMaskImgPath);
  if (stats.size === 0) {
    throw new Error(`步骤7失败: 生成的图片文件为空: ${textMaskImgPath}`);
  }
  
  console.log(`✅ 步骤7完成: 带绿色边框的图片已保存到 ${path.basename(textMaskImgPath)} (${Math.round(stats.size / 1024)}KB)`);
}

/**
 * 步骤7: 验证JSON文件已保存
 * @param {string} jsonPath - JSON文件路径
 */
async function verifyJsonSaved(jsonPath) {
  if (!fs.existsSync(jsonPath)) {
    throw new Error(`步骤7失败: 坐标JSON未保存到指定路径: ${jsonPath}`);
  }
  
  // 验证文件内容
  try {
    const jsonContent = fs.readFileSync(jsonPath, 'utf-8');
    const data = JSON.parse(jsonContent);
    
    if (data.dialogues && Array.isArray(data.dialogues)) {
      const regionCount = data.dialogues.length;
      console.log(`✅ 步骤7完成: 坐标JSON已保存到 ${path.basename(jsonPath)} (${regionCount}个区域)`);
    } else {
      throw new Error('JSON格式不正确');
    }
  } catch (error) {
    throw new Error(`步骤7失败: 坐标JSON文件格式错误: ${error.message}`);
  }
}

export { startComicTextDetector, DETECTION_PRESETS };