yichael
/
AIStoryBoard


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
							/**
 * 步骤：
 * 1. 创建startOcr()函数
 * 2. 创建变量cutDialogBlockImgNameArr 用来接收外部传入的图片路径数组
 * 3. 遍历cutDialogBlockImgNameArr，依次调用ocrComicImage()对图片中的的文字进行识别，根据图片所在路径保存识别结果json文件，json文件名称与图片名称相同，json文件内容与图片名称相同，json保存位置与图片所在文件夹相同
 * 4. 返回识别结果json文件路径数组
*/

import { execSync } from 'child_process';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import { sortDialoguesByPanels } from './sort-dialog.js';
import { getPythonPath as getPythonPathFromModule } from './python-path.js';
// import { start as startDialogBlockReg } from './dialog-block-reg.js'; // 文件不存在，暂时注释
// import { sortSentenceCharacters } from './sort-sentence-character.js'; // 未使用，暂时注释

// ES 模块中获取 __dirname 的兼容方式
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

/**
 * 获取项目根目录
 */
function getProjectRoot() {
  return path.join(__dirname, '..');
}

/**
 * 使用OnnxOCR进行OCR识别
 * @param {string} imagePath - 图片路径
 * @param {string} outputDir - 输出目录
 */
function ocrWithOnnxOCR(imagePath, outputDir) {
  const projectRoot = getProjectRoot();
  // 使用便携版Python
  const pythonEnv = getPythonPathFromModule(projectRoot);
  const pythonScript = path.join(projectRoot, 'python', 'generate-anim', 'ocr_with_onnxocr.py');
  
  if (!fs.existsSync(pythonScript)) {
    throw new Error(`OnnxOCR脚本不存在: ${pythonScript}`);
  }
  
  // 构建命令: python ocr_with_onnxocr.py <image_path> <text_mask_path> <output_dir> [config_json]
  // text_mask_path 为空字符串，config_json 使用默认配置
  const command = `"${pythonEnv}" "${pythonScript}" "${imagePath}" "" "${outputDir}" "{}"`;
  
  console.log(`   🔍 正在使用OnnxOCR识别文字: ${path.basename(imagePath)}`);
  
  try {
    execSync(command, {
      encoding: 'utf-8',
      stdio: 'pipe',
      cwd: projectRoot,
      env: { 
        ...process.env, 
        PYTHONIOENCODING: 'utf-8',
        PYTHONUTF8: '1'
      },
      shell: true
    });
  } catch (error) {
    console.error(`   ❌ OnnxOCR识别失败: ${error.message}`);
    throw new Error(`OnnxOCR识别失败: ${error.message}`);
  }
}

/**
 * 步骤1: 创建startOcr()函数
 * @param {Array<string>} cutDialogBlockImgNameArr - 步骤2: 图片路径数组（外部传入）
 * @returns {Promise<Array>} 步骤4: 返回识别结果json文件路径数组
 */
async function startOcr(cutDialogBlockImgNameArr) {
  try {
    console.log('🚀 开始OCR识别流程...');
    
    // 步骤2: 创建变量cutDialogBlockImgNameArr 用来接收外部传入的图片路径数组
    console.log('\n📋 步骤2: 验证图片路径数组参数');
    if (!cutDialogBlockImgNameArr || !Array.isArray(cutDialogBlockImgNameArr)) {
      throw new Error('步骤2失败: cutDialogBlockImgNameArr 必须是一个数组');
    }
    if (cutDialogBlockImgNameArr.length === 0) {
      throw new Error('步骤2失败: cutDialogBlockImgNameArr 不能为空数组');
    }
    console.log(`✅ 图片路径数组长度: ${cutDialogBlockImgNameArr.length}`);
    
    // 步骤3: 遍历cutDialogBlockImgNameArr，依次调用ocrComicImage()对图片中的的文字进行识别
    console.log('\n🔤 步骤3: 开始遍历图片数组进行OCR识别...');
    const jsonFilePaths = [];
    
    for (let i = 0; i < cutDialogBlockImgNameArr.length; i++) {
      const imagePath = cutDialogBlockImgNameArr[i];
      
      console.log(`\n   🔍 [${i + 1}/${cutDialogBlockImgNameArr.length}] 处理图片: ${path.basename(imagePath)}`);
      
      // 验证图片文件存在
      if (!fs.existsSync(imagePath)) {
        console.log(`   ⚠️ 图片文件不存在，跳过: ${imagePath}`);
        continue;
      }
      
      // 获取图片所在文件夹路径（json保存位置与图片所在文件夹相同）
      const imageDir = path.dirname(imagePath);
      const imageName = path.basename(imagePath, path.extname(imagePath));
      
      // json文件名称与图片名称相同（只是扩展名不同）
      const jsonFileName = `${imageName}.json`;
      const jsonFilePath = path.join(imageDir, jsonFileName);
      
      console.log(`   📄 JSON将保存到: ${jsonFileName}`);
      
      // 调用OnnxOCR进行识别
      // outputDir设置为图片所在文件夹，这样JSON会保存到图片所在文件夹
      ocrWithOnnxOCR(imagePath, imageDir);
      
      // 检查JSON文件是否生成（ocrComicImage可能生成 {imageName}_dialogues.json）
      const possibleJsonPaths = [
        jsonFilePath,  // {imageName}.json
        path.join(imageDir, `${imageName}_dialogues.json`)  // {imageName}_dialogues.json
      ];
      
      let foundJsonPath = null;
      for (const possiblePath of possibleJsonPaths) {
        if (fs.existsSync(possiblePath)) {
          foundJsonPath = possiblePath;
          break;
        }
      }
      
      if (foundJsonPath) {
        // 如果生成的文件名不是期望的 {imageName}.json，则重命名
        if (foundJsonPath !== jsonFilePath) {
          // 读取JSON内容，确保包含图片名称信息
          const jsonContent = JSON.parse(fs.readFileSync(foundJsonPath, 'utf-8'));
          // 确保JSON内容包含图片名称（json文件内容与图片名称相同）
          if (!jsonContent.image_name) {
            jsonContent.image_name = imageName;
          }
          // 保存为期望的文件名
          fs.writeFileSync(jsonFilePath, JSON.stringify(jsonContent, null, 2), 'utf-8');
          // 删除旧文件
          fs.unlinkSync(foundJsonPath);
          console.log(`   📝 已重命名JSON文件: ${path.basename(foundJsonPath)} -> ${jsonFileName}`);
        }
        
        jsonFilePaths.push(jsonFilePath);
        const stats = fs.statSync(jsonFilePath);
        console.log(`   ✅ OCR识别完成: ${jsonFileName} (${Math.round(stats.size / 1024)}KB)`);
      } else {
        console.log(`   ⚠️ JSON文件未生成: ${jsonFilePath}`);
      }
    }
    
    // 步骤4: 返回识别结果json文件路径数组
    console.log('\n📋 步骤4: 准备返回识别结果JSON文件路径数组...');
    console.log(`📄 JSON文件路径列表 (${jsonFilePaths.length} 个):`);
    jsonFilePaths.forEach((jsonPath, index) => {
      console.log(`   ${index + 1}. ${path.basename(jsonPath)}`);
    });
    console.log(`✅ 步骤4完成: JSON文件路径数组已准备就绪 (${jsonFilePaths.length} 个路径)`);
    
    console.log('\n🎉 所有步骤完成！');
    console.log(`📊 共处理 ${cutDialogBlockImgNameArr.length} 个图片`);
    console.log(`📊 共生成 ${jsonFilePaths.length} 个JSON文件`);
    
    return jsonFilePaths;  // 步骤4: 返回识别结果json文件路径数组
    
  } catch (error) {
    console.error(`\n❌ OCR识别失败: ${error.message}`);
    throw error;
  }
}

/**
 * 步骤1: 创建start()函数
 * 步骤2: 创建变量imagePath 用来接收外部传入的图片路径
 * 步骤3: 创建变量outputDir 用来接收外部传入的结果保存路径
 * 步骤4: 根据imagePath调用ocrComicImage()函数识别漫画图片中的对白文字，和文字块区域json文件
 * 步骤5: 根据outputDir路径保存结果图片
 * 
 * @param {string} imagePath - 图片路径（外部传入）
 * @param {string} outputDir - 输出目录（外部传入）
 * @param {string} projectRoot - 项目根目录（可选）
 * @returns {Object} OCR识别结果，包含对白文字和文字块区域信息
 */
async function start(imagePath, outputDir, projectRoot = null) {
  try {
    // 步骤2: 创建变量imagePath 用来接收外部传入的图片路径
    if (!imagePath) {
      throw new Error('imagePath 参数不能为空');
    }
    
    // 步骤3: 创建变量outputDir 用来接收外部传入的结果保存路径
    if (!outputDir) {
      throw new Error('outputDir 参数不能为空');
    }
    
    if (!projectRoot) {
      projectRoot = getProjectRoot();
    }
    
    // 如果路径是相对路径，转换为绝对路径
    if (!path.isAbsolute(imagePath)) {
      imagePath = path.resolve(projectRoot, imagePath);
    }
    if (!path.isAbsolute(outputDir)) {
      outputDir = path.resolve(projectRoot, outputDir);
    }
    
    if (!fs.existsSync(imagePath)) {
      throw new Error(`图片文件不存在: ${imagePath}`);
    }
    
    // 确保输出目录存在
    if (!fs.existsSync(outputDir)) {
      fs.mkdirSync(outputDir, { recursive: true });
    }
    
    console.log('='.repeat(60));
    console.log('🔤 OCR识别漫画图片中的对白文字');
    console.log('='.repeat(60));
    console.log(`📷 图片路径: ${imagePath}`);
    console.log(`📂 输出目录: ${outputDir}`);
    
    // 步骤4: 根据imagePath调用ocrComicImage()函数识别漫画图片中的对白文字，和计算整个文字区域的面积，根据文字块区域给出json定位文件
    console.log('\n🔤 步骤4: 调用ocrComicImage()函数识别漫画图片中的对白文字，计算文字区域面积，生成定位文件...');
    
    // 注意：现在区域检测都由comic-text-detector完成，不再使用OCR进行区域检测
    // 如果需要进行OCR识别，请使用startOcr函数，它只进行文字识别，不进行区域检测
    console.log(`⚠️  start()函数已废弃，区域检测应由comic-text-detector完成`);
    console.log(`⚠️  如需进行OCR识别，请使用startOcr()函数`);
    throw new Error('start()函数已废弃，请使用comic-text-detector进行区域检测，使用startOcr()进行OCR识别');
    
    // 步骤5: 根据outputDir路径保存结果图片和JSON文件
    // 使用baseImageName而不是imageName（去掉_text_mask后缀）
    const textBlocksJsonPath = path.join(outputDir, `${baseImageName}_text_blocks.json`);
    
    // 生成文字块区域JSON文件（从dialogues中提取bbox信息，计算面积）
    // 根据OCR返回的坐标信息，计算每个文字块的区域（类似参考图中的绿色框）
    let totalTextArea = 0; // 整个文字区域的总面积
    let imageWidth = 0;
    let imageHeight = 0;
    
    if (ocrResult.dialogues && ocrResult.dialogues.length > 0) {
      const textBlocks = ocrResult.dialogues.map((dialogue, index) => {
        let width = null;
        let height = null;
        let area = null;
        let center_x = null;
        let center_y = null;
        let bbox = null;
        
        // 处理bbox信息（支持多种格式）
        if (dialogue.bbox) {
          // 如果bbox已经是对象格式 {x1, y1, x2, y2, ...}
          if (typeof dialogue.bbox === 'object' && dialogue.bbox.x1 !== undefined) {
            bbox = {
              x1: dialogue.bbox.x1,
              y1: dialogue.bbox.y1,
              x2: dialogue.bbox.x2,
              y2: dialogue.bbox.y2
            };
            width = bbox.x2 - bbox.x1;
            height = bbox.y2 - bbox.y1;
          }
          // 如果bbox是数组格式 [x1, y1, x2, y2]
          else if (Array.isArray(dialogue.bbox) && dialogue.bbox.length >= 4) {
            bbox = {
              x1: dialogue.bbox[0],
              y1: dialogue.bbox[1],
              x2: dialogue.bbox[2],
              y2: dialogue.bbox[3]
            };
            width = bbox.x2 - bbox.x1;
            height = bbox.y2 - bbox.y1;
          }
          // 如果bbox已经是完整格式（包含width和height）
          else if (typeof dialogue.bbox === 'object' && dialogue.bbox.width !== undefined) {
            bbox = {
              x1: dialogue.bbox.x1,
              y1: dialogue.bbox.y1,
              x2: dialogue.bbox.x2,
              y2: dialogue.bbox.y2
            };
            width = dialogue.bbox.width;
            height = dialogue.bbox.height;
          }
          
          // 计算区域信息
          if (bbox && width !== null && height !== null) {
            area = width * height; // 计算文字块区域面积（像素²）
            center_x = (bbox.x1 + bbox.x2) / 2;
            center_y = (bbox.y1 + bbox.y2) / 2;
            
            // 累加总面积
            totalTextArea += area;
            
            // 更新图片尺寸（取最大坐标值）
            imageWidth = Math.max(imageWidth, bbox.x2);
            imageHeight = Math.max(imageHeight, bbox.y2);
          }
        }
        
        return {
          block_index: index + 1,
          order: dialogue.order || index + 1,
          text: dialogue.text || '',
          bbox: bbox || dialogue.bbox || null,
          center_x: center_x,
          center_y: center_y,
          width: width,
          height: height,
          area: area // 单个文字块区域面积（像素²）
        };
      });
      
      // 计算图片总面积
      const totalImageArea = imageWidth * imageHeight;
      const textAreaRatio = totalImageArea > 0 ? (totalTextArea / totalImageArea * 100).toFixed(2) : 0;
      
      const textBlocksResult = {
        image_file: imageName + path.extname(imagePath),
        image_size: {
          width: imageWidth,
          height: imageHeight,
          total_area: totalImageArea
        },
        text_blocks: textBlocks,
        total_count: textBlocks.length,
        total_text_area: totalTextArea, // 整个文字区域的总面积
        text_area_ratio: parseFloat(textAreaRatio) // 文字区域占图片总面积的百分比
      };
      
      fs.writeFileSync(textBlocksJsonPath, JSON.stringify(textBlocksResult, null, 2), 'utf-8');
      console.log(`   ✅ 文字块区域JSON: ${path.basename(textBlocksJsonPath)}`);
      console.log(`   📊 文字区域总面积: ${totalTextArea.toFixed(0)} 像素²`);
      console.log(`   📊 文字区域占比: ${textAreaRatio}%`);
    } else {
      // 如果没有dialogues，创建一个空的text_blocks.json文件
      const textBlocksResult = {
        image_file: imageName + path.extname(imagePath),
        image_size: {
          width: 0,
          height: 0,
          total_area: 0
        },
        text_blocks: [],
        total_count: 0,
        total_text_area: 0,
        text_area_ratio: 0
      };
      fs.writeFileSync(textBlocksJsonPath, JSON.stringify(textBlocksResult, null, 2), 'utf-8');
      console.log(`   ⚠️  没有检测到文字块，已创建空的text_blocks.json文件`);
    }
    
    // 验证文字块区域JSON文件是否存在（这是后续流程需要的文件）
    if (!fs.existsSync(textBlocksJsonPath)) {
      throw new Error(`文字块区域JSON文件未生成: ${textBlocksJsonPath}`);
      
      // 步骤5: 生成带绿色线框的文字区域图片
      console.log(`\n🖼️  步骤5: 生成带绿色线框的文字区域图片...`);
      const textRegionImagePath = path.join(outputDir, `${imageName}_text_region.png`);
      
      try {
        // 调用dialog-block-reg.js生成带绿色线框的图片
        // 注意: dialog-block-reg.js 文件不存在，此功能暂时禁用
        console.log(`   ⚠️  生成文字区域图片功能暂时禁用（dialog-block-reg.js 不存在）`);
        // const targetImagePath = pureTextImagePath || imagePath;
        // const markedImagePath = await startDialogBlockReg(
        //   targetImagePath,
        //   textBlocksJsonPath,
        //   outputDir,
        //   2,
        //   projectRoot
        // );
      } catch (error) {
        console.error(`   ⚠️  生成文字区域图片失败: ${error.message}`);
      }
    }
    
    console.log('\n✅ 步骤5: 结果已保存到outputDir目录');
    console.log(`   ✅ 文字块区域JSON: ${path.basename(textBlocksJsonPath)}`);
    console.log(`   ✅ 识别到 ${ocrResult.text_count} 段对白文字`);
    
    console.log('\n' + '='.repeat(60));
    console.log('✅ 处理完成！');
    console.log('='.repeat(60));
    
    // 返回包含文字块区域信息的完整结果
    const textRegionImagePath = path.join(outputDir, `${imageName}_text_region.png`);
    return {
      ...ocrResult,
      text_blocks_json_path: textBlocksJsonPath,
      text_region_image_path: fs.existsSync(textRegionImagePath) ? textRegionImagePath : null
    };
  } catch (error) {
    console.error(`\n❌ 处理失败: ${error.message}`);
    if (error.stack) {
      console.error(error.stack);
    }
    throw error;
  }
}

/**
 * 步骤4: 根据imagePath调用ocrComicImage()函数识别漫画图片中的对白文字
 * 调用Python OCR脚本识别漫画图片中的对白文字（使用气泡框检测模式）
 * @param {string} imagePath - 图片路径
 * @param {string} outputDir - 输出目录（可选）
 * @param {boolean} useBubbleDetection - 是否使用气泡框检测模式（默认true）
 * @param {boolean} useOptimized - 是否使用优化版OCR（默认true，提升准确率）
 * @param {string} projectRoot - 项目根目录（可选，默认从__dirname推断）
 * @param {string} pythonScript - Python脚本路径（可选，默认从projectRoot推断）
 * @param {string} pythonEnv - Python环境路径（可选，默认从projectRoot推断）
 * @returns {Object} OCR识别结果
 */
function ocrComicImage(imagePath, outputDir = null, useBubbleDetection = true, useOptimized = true, projectRoot = null, pythonScript = null, pythonEnv = null) {
  try {
    // 如果没有传入projectRoot，从__dirname推断
    if (!projectRoot) {
      projectRoot = path.join(__dirname, '..');
    }
    
    // 如果没有传入pythonScript，从projectRoot推断
    if (!pythonScript) {
      pythonScript = path.join(projectRoot, 'python', 'generate-anim', 'detect_and_ocr_comic.py');
    }
    
    // 如果没有传入pythonEnv，从projectRoot推断
    if (!pythonEnv) {
      pythonEnv = getPythonPath(projectRoot);
    }
    
    // 使用虚拟环境的 Python，不需要设置 PYTHONPATH
    const env = { ...process.env };
    
    // 构建命令
    let command = `"${pythonEnv}" "${pythonScript}" "${imagePath}"`;
    if (outputDir) {
      command += ` -o "${outputDir}"`;
    }
    
    console.log(`🔍 正在检测文字区域并识别文字: ${path.basename(imagePath)}`);
    
    // 执行Python脚本
    execSync(command, { 
      encoding: 'utf-8',
      stdio: 'inherit',
      cwd: projectRoot,
      env: env
    });
    
    // 读取生成的JSON文件
    const imageDir = path.dirname(imagePath);
    const imageName = path.basename(imagePath, path.extname(imagePath));
    
    // 检测结果文件名
    const jsonFileName = `${imageName}_dialogues.json`;
    
    // 最终JSON文件保存到outputDir（ocr目录）
    const jsonPath = outputDir 
      ? path.join(outputDir, jsonFileName)
      : path.join(imageDir, jsonFileName);
    
    // 中间文件在tmp目录
    const tmpDir = outputDir 
      ? path.join(outputDir, 'tmp')
      : path.join(imageDir, 'tmp');
    
    if (fs.existsSync(jsonPath)) {
      const jsonContent = fs.readFileSync(jsonPath, 'utf-8');
      const ocrResult = JSON.parse(jsonContent);
      
      // 读取格子信息（从tmp目录）
      const panelsJsonPath = path.join(tmpDir, `${imageName}_panels.json`);
      
      let panels = [];
      if (fs.existsSync(panelsJsonPath)) {
        const panelsContent = fs.readFileSync(panelsJsonPath, 'utf-8');
        const panelsData = JSON.parse(panelsContent);
        panels = panelsData.panels || [];
      }
      
      // 获取图片尺寸（从OCR结果或panels中推断）
      let imageWidth = 1334; // 默认值
      let imageHeight = 1940; // 默认值
      
      // 如果有对话，从第一个对话的bbox推断图片尺寸
      if (ocrResult.dialogues && ocrResult.dialogues.length > 0) {
        const firstDialogue = ocrResult.dialogues[0];
        if (firstDialogue.bbox) {
          // 估算图片尺寸（bbox通常是相对坐标，需要估算）
          imageWidth = Math.max(imageWidth, firstDialogue.bbox.x2 || 1334);
          imageHeight = Math.max(imageHeight, firstDialogue.bbox.y2 || 1940);
        }
      }
      
      // 步骤1: 先排序所有识别到的气泡框里的对话
      console.log(`📋 步骤1: 排序对话（基于格子位置）...`);
      let sortedDialogues = ocrResult.dialogues || [];
      
      // 确保每个对话都有bbox信息
      sortedDialogues = sortedDialogues.map((dialogue, index) => {
        if (!dialogue.bbox && dialogue.order) {
          // 如果没有bbox，尝试从原始数据中获取
          // 这里假设原始数据中有bbox信息
          return dialogue;
        }
        return dialogue;
      });
      
      // 如果有格子信息，使用格子排序
      if (panels.length > 0 && sortedDialogues.length > 0) {
        // 确保所有对话都有bbox
        const dialoguesWithBbox = sortedDialogues.filter(d => d.bbox);
        if (dialoguesWithBbox.length > 0) {
          sortedDialogues = sortDialoguesByPanels(
            dialoguesWithBbox,
            panels,
            imageWidth,
            imageHeight
          );
        }
      }
      
      // 步骤2: 对每个对话内的字符进行排序
      console.log(`📝 步骤2: 排序每个对话内的字符...`);
      // 计算实际图片高度（从所有对话的bbox中找到最大的y2值）
      let actualImageHeight = imageHeight;
      if (sortedDialogues.length > 0) {
        const maxY2 = Math.max(...sortedDialogues
          .filter(d => d.bbox && d.bbox.y2)
          .map(d => d.bbox.y2));
        if (maxY2 > 0) {
          actualImageHeight = Math.max(actualImageHeight, maxY2);
        }
      }
      
      const finalDialogues = sortedDialogues.map((dialogue, index) => {
        // 暂时跳过字符排序，直接使用OCR原始识别结果
        const originalText = dialogue.text;
        
        // 调试输出：显示OCR原始识别结果
        if (originalText && (originalText.includes('远道') || originalText.includes('石田'))) {
          console.log(`  [DEBUG ocr.js] 对话${dialogue.order || index + 1}: OCR原始文本="${originalText}"`);
        }
        
        return {
          order: dialogue.order || index + 1,
          text: originalText,  // 直接使用OCR原始文本，不排序
          bbox: dialogue.bbox || null,
          character_positions: dialogue.character_positions || null
        };
      });
      
      // 保存排序后的结果
      const finalResult = {
        image_file: ocrResult.image_file || path.basename(imagePath),
        reading_order: '从右到左、从上到下（日式漫画阅读顺序）',
        dialogues: finalDialogues,
        total_count: finalDialogues.length
      };
      
      // 覆盖原JSON文件
      fs.writeFileSync(jsonPath, JSON.stringify(finalResult, null, 2), 'utf-8');
      console.log(`✅ 已保存排序后的对话结果: ${jsonPath}`);
      
      // 统一返回格式
      const result = {
        text_count: finalResult.total_count,
        texts: finalDialogues.map(d => d.text),
        dialogues: finalDialogues,
        reading_order: finalResult.reading_order
      };
      
      console.log(`✅ OCR识别完成: 识别到 ${result.text_count} 段文字`);
      return result;
    } else {
      throw new Error(`OCR结果文件不存在: ${jsonPath}`);
    }
  } catch (error) {
    console.error(`❌ OCR识别失败: ${error.message}`);
    throw error;
  }
}

/**
 * 使用PaddleOCR文本检测模块进行精确的文字区域检测（不依赖inference模块）
 * @param {string} imagePath - 图片路径
 * @param {string} textMaskPath - 文字遮罩图路径（可选，当前未使用）
 * @param {string} outputDir - 输出目录
 * @param {string} projectRoot - 项目根目录
 * @returns {Object} OCR识别结果
 */
async function ocrWithPaddleOcr(imagePath, textMaskPath, outputDir, projectRoot) {
  try {
    // 使用便携版Python
    const pythonEnv = getPythonPathFromModule(projectRoot);
    // 使用新的文本检测脚本
    const detectionScript = path.join(projectRoot, 'python', 'generate-anim', 'paddleocr_text_detection.py');
    
    console.log(`🔍 使用PaddleOCR文本检测模块检测文字区域: ${path.basename(imagePath)}`);
    console.log(`📷 检测图片路径: ${imagePath}`);
    console.log(`📂 输出目录: ${outputDir}`);
    
    // 调用文本检测脚本（只做检测，不做识别）
    // 确保使用原始图片路径（图1）进行检测
    // 使用绝对路径避免编码问题
    const absImagePath = path.resolve(imagePath);
    const absOutputDir = path.resolve(outputDir);
    const command = `"${pythonEnv}" "${detectionScript}" "${absImagePath}" "${absOutputDir}" 0.5`;
    execSync(command, {
      encoding: 'utf-8',
      stdio: 'inherit',
      cwd: projectRoot,
      env: { ...process.env, PYTHONIOENCODING: 'utf-8' }
    });
    
    // 读取生成的JSON文件（只读取text_blocks.json，不读取dialogues.json）
    const imageName = path.basename(imagePath, path.extname(imagePath));
    // 如果文件名包含_text_mask，去掉这个后缀
    let baseImageName = imageName;
    if (baseImageName.endsWith('_text_mask')) {
      baseImageName = baseImageName.replace(/_text_mask$/, '');
    }
    const textBlocksJsonPath = path.join(outputDir, `${baseImageName}_text_blocks.json`);
    
    // 等待文件生成
    let retries = 50;
    while (retries > 0 && !fs.existsSync(textBlocksJsonPath)) {
      const start = Date.now();
      while (Date.now() - start < 100) {
        // 等待100ms
      }
      retries--;
    }
    
    if (fs.existsSync(textBlocksJsonPath)) {
      const jsonContent = fs.readFileSync(textBlocksJsonPath, 'utf-8');
      const ocrResult = JSON.parse(jsonContent);
      
      // 从text_blocks转换为dialogues格式（用于返回）
      const dialogues = (ocrResult.text_blocks || []).map((block, index) => ({
        order: block.order || block.block_index || index + 1,
        text: block.text || '',
        bbox: block.bbox || null
      }));
      
      // 统一返回格式
      return {
        text_count: ocrResult.total_count || dialogues.length || 0,
        texts: dialogues.map(d => d.text) || [],
        dialogues: dialogues,
        reading_order: '从右到左、从上到下（日式漫画阅读顺序）'
      };
    } else {
      throw new Error(`文字块区域JSON文件未生成: ${textBlocksJsonPath}`);
    }
  } catch (error) {
    console.error(`❌ PaddleOCR文本检测失败: ${error.message}`);
    throw error;
  }
}

/**
 * 从text_detection.json生成dialogues.json（当OCR失败时的备用方案）
 * @param {string} imagePath - 图片路径
 * @param {string} textDetectionJsonPath - text_detection.json路径
 * @param {string} outputDir - 输出目录
 * @param {string} projectRoot - 项目根目录
 * @returns {Object} OCR识别结果
 */
async function ocrFromTextDetection(imagePath, textDetectionJsonPath, outputDir, projectRoot) {
  try {
    // 读取text_detection.json
    const textDetectionData = JSON.parse(fs.readFileSync(textDetectionJsonPath, 'utf-8'));
    const textBlocks = textDetectionData.text_blocks || [];
    
    console.log(`📖 从text_detection.json读取到 ${textBlocks.length} 个文字块`);
    
    // 将text_blocks转换为dialogues格式
    const dialogues = textBlocks.map((block, index) => ({
      order: index + 1,
      text: '', // text_detection.json中没有文本内容，需要后续OCR识别
      bbox: block.bbox || null
    }));
    
    // 注意：如果imagePath是text_mask.png，需要提取原始图片名称
    let imageName = path.basename(imagePath, path.extname(imagePath));
    // 如果文件名包含_text_mask，去掉这个后缀
    if (imageName.endsWith('_text_mask')) {
      imageName = imageName.replace(/_text_mask$/, '');
    }
    
    // 不再生成dialogues.json文件，因为后续流程只使用text_blocks.json
    // dialogues.json文件中的text字段都是空的，没有实际用途
    
    return {
      text_count: dialogues.length,
      texts: dialogues.map(d => d.text),
      dialogues: dialogues,
      reading_order: '从右到左、从上到下（日式漫画阅读顺序）'
    };
  } catch (error) {
    console.error(`❌ 从text_detection.json生成dialogues失败: ${error.message}`);
    throw error;
  }
}

/**
 * 检查Python依赖是否已安装
 * @param {string} pythonPath - Python可执行文件路径
 * @returns {boolean} 依赖是否已安装
 */
function checkPythonDependencies(pythonPath) {
  try {
    // 检查基础依赖（comic-text-detector 和 onnxocr 需要的）
    const checkCommand = `"${pythonPath}" -c "import torch; import cv2; import numpy; from onnxocr import onnx_paddleocr; print('OK')"`;
    const result = execSync(checkCommand, { 
      encoding: 'utf-8',
      stdio: 'pipe',
      timeout: 10000,
      windowsHide: true
    });
    // 如果命令执行成功（没有抛出异常），说明依赖已安装
    return result.includes('OK') || true; // 只要没抛异常就认为成功
  } catch (error) {
    // 如果导入失败，说明依赖未安装
    return false;
  }
}

/**
 * 获取Python可执行文件路径（Windows 11）
 * @param {string} projectRoot - 项目根目录
 * @returns {string} Python可执行文件路径
 */
function getPythonPath(projectRoot) {
  // 使用便携版Python
  return getPythonPathFromModule(projectRoot);
}

/**
 * 批量识别目录下的所有图片
 * @param {string} imageDir - 图片目录
 * @param {string} outputDir - 输出目录（可选）
 * @returns {Array} OCR识别结果列表
 */
function ocrComicImages(imageDir, outputDir = null) {
  try {
    // 读取目录下的所有图片
    const imageFiles = fs.readdirSync(imageDir)
      .filter(file => /\.(jpg|jpeg|png|bmp|webp)$/i.test(file))
      .sort();
    
    const results = [];
    for (const imageFile of imageFiles) {
      const imagePath = path.join(imageDir, imageFile);
      try {
        const result = ocrComicImage(imagePath, outputDir);
        results.push(result);
      } catch (error) {
        console.error(`❌ 处理 ${imageFile} 失败: ${error.message}`);
      }
    }
    
    console.log(`✅ 批量OCR识别完成: 共处理 ${results.length} 张图片`);
    return results;
  } catch (error) {
    console.error(`❌ OCR识别失败: ${error.message}`);
    throw error;
  }
}

/**
 * 读取OCR JSON文件
 * @param {string} jsonPath - JSON文件路径
 * @returns {Object} OCR识别结果
 */
function readOcrJson(jsonPath) {
  try {
    if (!fs.existsSync(jsonPath)) {
      return null;
    }
    
    const jsonContent = fs.readFileSync(jsonPath, 'utf-8');
    return JSON.parse(jsonContent);
  } catch (error) {
    console.error(`❌ 读取OCR JSON失败: ${error.message}`);
    return null;
  }
}

// 导出函数
export { start, startOcr, ocrComicImage, ocrComicImages, readOcrJson };

// 自动执行的测试代码已删除
// 现在区域检测都由comic-text-detector完成，不再使用OCR进行区域检测