| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- /**
- * 批量处理漫画图片,生成对白JSON文件
- * 处理指定目录下的所有图片
- */
- import fs from 'fs';
- import path from 'path';
- import { fileURLToPath } from 'url';
- import { detectComicPanels, mergePanelMaskWithTextMask } from '../check-reg.js';
- import { ocrComicImage } from '../ocr.js';
- import { sortDialoguesByPanels } from '../sort-dialog.js';
- import { sortSentenceCharacters } from '../sort-sentence-character.js';
- import { generateDialogJson } from '../generate-dialog-json.js';
- // ES 模块中获取 __dirname 的兼容方式
- const __filename = fileURLToPath(import.meta.url);
- const __dirname = path.dirname(__filename);
- /**
- * 获取项目根目录
- */
- function getProjectRoot() {
- return path.join(__dirname, '..', '..');
- }
- /**
- * 处理单张图片
- */
- function processSingleImage(imagePath, projectRoot, forceReprocess = false) {
- try {
- const imageName = path.basename(imagePath, path.extname(imagePath));
- const imageDir = path.dirname(imagePath);
- const outputDir = path.join(imageDir, 'ocr');
- const tmpDir = path.join(outputDir, 'tmp');
-
- // 确保输出目录和tmp目录存在
- if (!fs.existsSync(outputDir)) {
- fs.mkdirSync(outputDir, { recursive: true });
- }
- if (!fs.existsSync(tmpDir)) {
- fs.mkdirSync(tmpDir, { recursive: true });
- }
-
- // 检查是否已经存在结果文件
- const outputJsonPath = path.join(outputDir, `${imageName}_dialogues.json`);
-
- if (fs.existsSync(outputJsonPath) && !forceReprocess) {
- console.log(`⏭️ 跳过 ${path.basename(imagePath)} (已存在结果文件)`);
- return { success: true, skipped: true };
- }
-
- console.log(`\n${'='.repeat(60)}`);
- console.log(`📷 正在处理: ${path.basename(imagePath)}`);
- console.log('='.repeat(60));
-
- // 1. 检测漫画格子
- console.log('🔍 步骤1: 检测漫画格子...');
- const panelResult = detectComicPanels(imagePath, tmpDir, null, projectRoot);
-
- // 读取格子信息
- const panelsJsonPath = path.join(tmpDir, `${imageName}_panels.json`);
- let panels = [];
- if (fs.existsSync(panelsJsonPath)) {
- const panelsContent = fs.readFileSync(panelsJsonPath, 'utf-8');
- const panelsData = JSON.parse(panelsContent);
- panels = panelsData.panels || [];
- console.log(`✅ 读取到 ${panels.length} 个格子`);
- }
-
- const panelMaskPath = path.join(tmpDir, `${imageName}_panel_mask.png`);
-
- // 2. OCR识别文字
- console.log('🔤 步骤2: 识别文字及坐标...');
- const ocrResult = ocrComicImage(imagePath, outputDir, true, true, projectRoot);
-
- // 获取文字遮罩图路径
- const textMaskPath = path.join(tmpDir, `${imageName}_text_mask.png`);
-
- if (!ocrResult.dialogues || ocrResult.dialogues.length === 0) {
- console.log('⚠️ 未识别到任何对话');
- return { success: false, error: '未识别到任何对话' };
- }
-
- // 3. 如果格子数量不足,使用文字遮罩图重新检测
- const initialPanelCount = panels.length;
- if (panels.length < 4 && fs.existsSync(textMaskPath)) {
- console.log('🔍 步骤2.5: 使用文字遮罩图重新检测格子...');
- try {
- const panelResultRetry = detectComicPanels(imagePath, tmpDir, textMaskPath, projectRoot);
- if (fs.existsSync(panelsJsonPath)) {
- const panelsContentRetry = fs.readFileSync(panelsJsonPath, 'utf-8');
- const panelsDataRetry = JSON.parse(panelsContentRetry);
- const panelsRetry = panelsDataRetry.panels || [];
- if (panelsRetry.length > initialPanelCount) {
- panels = panelsRetry;
- console.log(`✅ 使用文字遮罩图检测到 ${panels.length} 个格子(之前: ${initialPanelCount} 个)`);
- }
- }
- } catch (error) {
- console.log(`⚠️ 使用文字遮罩图重新检测失败: ${error.message}`);
- }
- }
-
- // 4. 合并格子遮罩图和文字遮罩图
- console.log('🔗 步骤3: 合并格子遮罩图和文字遮罩图...');
- if (fs.existsSync(panelMaskPath) && fs.existsSync(textMaskPath)) {
- const combinedMaskPath = path.join(tmpDir, `${imageName}_combined_mask.png`);
- mergePanelMaskWithTextMask(panelMaskPath, textMaskPath, combinedMaskPath, projectRoot);
- console.log(`✅ 已合并遮罩图`);
- }
-
- // 5. 整理句子顺序
- console.log('📋 步骤4: 整理句子顺序...');
- let sortedDialogues = ocrResult.dialogues || [];
-
- // 获取图片尺寸
- let imageWidth = 1334;
- let imageHeight = 1940;
- if (sortedDialogues.length > 0 && sortedDialogues[0].bbox) {
- const firstBbox = sortedDialogues[0].bbox;
- imageWidth = Math.max(imageWidth, firstBbox.x2 || 1334);
- imageHeight = Math.max(imageHeight, firstBbox.y2 || 1940);
- }
-
- // 使用格子排序
- if (panels.length > 0 && sortedDialogues.length > 0) {
- const dialoguesWithBbox = sortedDialogues.filter(d => d.bbox);
- if (dialoguesWithBbox.length > 0) {
- sortedDialogues = sortDialoguesByPanels(
- dialoguesWithBbox,
- panels,
- imageWidth,
- imageHeight
- );
- }
- }
-
- // 6. 整理每个气泡对话框里的文字顺序
- console.log('📝 步骤5: 整理每个气泡对话框里的文字顺序...');
- // 计算实际图片高度(从所有对话的bbox中找到最大的y2值)
- let actualImageHeight = imageHeight;
- if (sortedDialogues.length > 0) {
- const maxY2 = Math.max(...sortedDialogues
- .filter(d => d.bbox && d.bbox.y2)
- .map(d => d.bbox.y2));
- if (maxY2 > 0) {
- actualImageHeight = Math.max(actualImageHeight, maxY2);
- }
- }
-
- const finalDialogues = sortedDialogues.map((dialogue, index) => {
- let sortedText = dialogue.text;
-
- // 使用字符坐标进行排序(如果有的话)
- if (dialogue.character_positions && dialogue.character_positions.length > 0) {
- sortedText = sortSentenceCharacters(dialogue.text, dialogue.bbox, dialogue.character_positions, actualImageHeight);
- } else if (dialogue.bbox) {
- // 回退到基于bbox的排序
- sortedText = sortSentenceCharacters(dialogue.text, dialogue.bbox, null, actualImageHeight);
- }
-
- return {
- order: dialogue.order || index + 1,
- text: sortedText,
- bbox: dialogue.bbox || null,
- character_positions: dialogue.character_positions || null
- };
- });
-
- // 7. 生成对话JSON
- console.log('💾 步骤6: 生成对话JSON...');
- generateDialogJson(finalDialogues, path.basename(imagePath), outputJsonPath);
-
- console.log(`✅ 处理完成: ${path.basename(imagePath)} (${finalDialogues.length} 段对话)`);
-
- return { success: true, dialogues: finalDialogues.length };
- } catch (error) {
- console.error(`❌ 处理失败: ${path.basename(imagePath)} - ${error.message}`);
- return { success: false, error: error.message };
- }
- }
- /**
- * 批量处理目录下的所有图片
- */
- function batchProcessImages() {
- try {
- const projectRoot = getProjectRoot();
- const targetDir = path.join(
- projectRoot,
- 'static',
- '漫画',
- 'image',
- '鬼-巷第001卷',
- '第一章'
- );
-
- console.log('='.repeat(60));
- console.log('🚀 批量处理漫画图片');
- console.log('='.repeat(60));
- console.log(`📁 目标目录: ${targetDir}`);
-
- if (!fs.existsSync(targetDir)) {
- throw new Error(`目录不存在: ${targetDir}`);
- }
-
- // 读取目录下的所有图片文件
- const files = fs.readdirSync(targetDir);
- const imageExtensions = ['.jpeg', '.jpg', '.png', '.bmp'];
- const imageFiles = files
- .filter(file => {
- const ext = path.extname(file).toLowerCase();
- return imageExtensions.includes(ext);
- })
- .sort() // 按文件名排序
- .map(file => path.join(targetDir, file));
-
- if (imageFiles.length === 0) {
- console.log('⚠️ 未找到任何图片文件');
- return;
- }
-
- console.log(`📊 找到 ${imageFiles.length} 张图片\n`);
-
- const forceReprocess = process.env.FORCE_REPROCESS === 'true' || process.argv.includes('--force');
-
- // 统计信息
- let successCount = 0;
- let skipCount = 0;
- let failCount = 0;
-
- // 处理每张图片
- for (let i = 0; i < imageFiles.length; i++) {
- const imagePath = imageFiles[i];
- const result = processSingleImage(imagePath, projectRoot, forceReprocess);
-
- if (result.skipped) {
- skipCount++;
- } else if (result.success) {
- successCount++;
- } else {
- failCount++;
- }
-
- // 显示进度
- console.log(`\n进度: ${i + 1}/${imageFiles.length}`);
- }
-
- // 显示总结
- console.log('\n' + '='.repeat(60));
- console.log('📊 处理完成统计');
- console.log('='.repeat(60));
- console.log(`✅ 成功: ${successCount} 张`);
- console.log(`⏭️ 跳过: ${skipCount} 张`);
- console.log(`❌ 失败: ${failCount} 张`);
- console.log(`📁 总计: ${imageFiles.length} 张`);
- console.log('='.repeat(60));
- } catch (error) {
- console.error('\n' + '='.repeat(60));
- console.error('❌ 批量处理失败:', error.message);
- console.error('='.repeat(60));
- throw error;
- }
- }
- // 如果直接运行此脚本
- if (import.meta.url === `file://${process.argv[1]}` || process.argv[1]?.endsWith('generate-subtitle.js')) {
- batchProcessImages();
- }
- export { batchProcessImages, processSingleImage };
|