generate-subtitle.js 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. /**
  2. * 批量处理漫画图片,生成对白JSON文件
  3. * 处理指定目录下的所有图片
  4. */
  5. import fs from 'fs';
  6. import path from 'path';
  7. import { fileURLToPath } from 'url';
  8. import { detectComicPanels, mergePanelMaskWithTextMask } from '../check-reg.js';
  9. import { ocrComicImage } from '../ocr.js';
  10. import { sortDialoguesByPanels } from '../sort-dialog.js';
  11. import { sortSentenceCharacters } from '../sort-sentence-character.js';
  12. import { generateDialogJson } from '../generate-dialog-json.js';
  13. // ES 模块中获取 __dirname 的兼容方式
  14. const __filename = fileURLToPath(import.meta.url);
  15. const __dirname = path.dirname(__filename);
  16. /**
  17. * 获取项目根目录
  18. */
  19. function getProjectRoot() {
  20. return path.join(__dirname, '..', '..');
  21. }
  22. /**
  23. * 处理单张图片
  24. */
  25. function processSingleImage(imagePath, projectRoot, forceReprocess = false) {
  26. try {
  27. const imageName = path.basename(imagePath, path.extname(imagePath));
  28. const imageDir = path.dirname(imagePath);
  29. const outputDir = path.join(imageDir, 'ocr');
  30. const tmpDir = path.join(outputDir, 'tmp');
  31. // 确保输出目录和tmp目录存在
  32. if (!fs.existsSync(outputDir)) {
  33. fs.mkdirSync(outputDir, { recursive: true });
  34. }
  35. if (!fs.existsSync(tmpDir)) {
  36. fs.mkdirSync(tmpDir, { recursive: true });
  37. }
  38. // 检查是否已经存在结果文件
  39. const outputJsonPath = path.join(outputDir, `${imageName}_dialogues.json`);
  40. if (fs.existsSync(outputJsonPath) && !forceReprocess) {
  41. console.log(`⏭️ 跳过 ${path.basename(imagePath)} (已存在结果文件)`);
  42. return { success: true, skipped: true };
  43. }
  44. console.log(`\n${'='.repeat(60)}`);
  45. console.log(`📷 正在处理: ${path.basename(imagePath)}`);
  46. console.log('='.repeat(60));
  47. // 1. 检测漫画格子
  48. console.log('🔍 步骤1: 检测漫画格子...');
  49. const panelResult = detectComicPanels(imagePath, tmpDir, null, projectRoot);
  50. // 读取格子信息
  51. const panelsJsonPath = path.join(tmpDir, `${imageName}_panels.json`);
  52. let panels = [];
  53. if (fs.existsSync(panelsJsonPath)) {
  54. const panelsContent = fs.readFileSync(panelsJsonPath, 'utf-8');
  55. const panelsData = JSON.parse(panelsContent);
  56. panels = panelsData.panels || [];
  57. console.log(`✅ 读取到 ${panels.length} 个格子`);
  58. }
  59. const panelMaskPath = path.join(tmpDir, `${imageName}_panel_mask.png`);
  60. // 2. OCR识别文字
  61. console.log('🔤 步骤2: 识别文字及坐标...');
  62. const ocrResult = ocrComicImage(imagePath, outputDir, true, true, projectRoot);
  63. // 获取文字遮罩图路径
  64. const textMaskPath = path.join(tmpDir, `${imageName}_text_mask.png`);
  65. if (!ocrResult.dialogues || ocrResult.dialogues.length === 0) {
  66. console.log('⚠️ 未识别到任何对话');
  67. return { success: false, error: '未识别到任何对话' };
  68. }
  69. // 3. 如果格子数量不足,使用文字遮罩图重新检测
  70. const initialPanelCount = panels.length;
  71. if (panels.length < 4 && fs.existsSync(textMaskPath)) {
  72. console.log('🔍 步骤2.5: 使用文字遮罩图重新检测格子...');
  73. try {
  74. const panelResultRetry = detectComicPanels(imagePath, tmpDir, textMaskPath, projectRoot);
  75. if (fs.existsSync(panelsJsonPath)) {
  76. const panelsContentRetry = fs.readFileSync(panelsJsonPath, 'utf-8');
  77. const panelsDataRetry = JSON.parse(panelsContentRetry);
  78. const panelsRetry = panelsDataRetry.panels || [];
  79. if (panelsRetry.length > initialPanelCount) {
  80. panels = panelsRetry;
  81. console.log(`✅ 使用文字遮罩图检测到 ${panels.length} 个格子(之前: ${initialPanelCount} 个)`);
  82. }
  83. }
  84. } catch (error) {
  85. console.log(`⚠️ 使用文字遮罩图重新检测失败: ${error.message}`);
  86. }
  87. }
  88. // 4. 合并格子遮罩图和文字遮罩图
  89. console.log('🔗 步骤3: 合并格子遮罩图和文字遮罩图...');
  90. if (fs.existsSync(panelMaskPath) && fs.existsSync(textMaskPath)) {
  91. const combinedMaskPath = path.join(tmpDir, `${imageName}_combined_mask.png`);
  92. mergePanelMaskWithTextMask(panelMaskPath, textMaskPath, combinedMaskPath, projectRoot);
  93. console.log(`✅ 已合并遮罩图`);
  94. }
  95. // 5. 整理句子顺序
  96. console.log('📋 步骤4: 整理句子顺序...');
  97. let sortedDialogues = ocrResult.dialogues || [];
  98. // 获取图片尺寸
  99. let imageWidth = 1334;
  100. let imageHeight = 1940;
  101. if (sortedDialogues.length > 0 && sortedDialogues[0].bbox) {
  102. const firstBbox = sortedDialogues[0].bbox;
  103. imageWidth = Math.max(imageWidth, firstBbox.x2 || 1334);
  104. imageHeight = Math.max(imageHeight, firstBbox.y2 || 1940);
  105. }
  106. // 使用格子排序
  107. if (panels.length > 0 && sortedDialogues.length > 0) {
  108. const dialoguesWithBbox = sortedDialogues.filter(d => d.bbox);
  109. if (dialoguesWithBbox.length > 0) {
  110. sortedDialogues = sortDialoguesByPanels(
  111. dialoguesWithBbox,
  112. panels,
  113. imageWidth,
  114. imageHeight
  115. );
  116. }
  117. }
  118. // 6. 整理每个气泡对话框里的文字顺序
  119. console.log('📝 步骤5: 整理每个气泡对话框里的文字顺序...');
  120. // 计算实际图片高度(从所有对话的bbox中找到最大的y2值)
  121. let actualImageHeight = imageHeight;
  122. if (sortedDialogues.length > 0) {
  123. const maxY2 = Math.max(...sortedDialogues
  124. .filter(d => d.bbox && d.bbox.y2)
  125. .map(d => d.bbox.y2));
  126. if (maxY2 > 0) {
  127. actualImageHeight = Math.max(actualImageHeight, maxY2);
  128. }
  129. }
  130. const finalDialogues = sortedDialogues.map((dialogue, index) => {
  131. let sortedText = dialogue.text;
  132. // 使用字符坐标进行排序(如果有的话)
  133. if (dialogue.character_positions && dialogue.character_positions.length > 0) {
  134. sortedText = sortSentenceCharacters(dialogue.text, dialogue.bbox, dialogue.character_positions, actualImageHeight);
  135. } else if (dialogue.bbox) {
  136. // 回退到基于bbox的排序
  137. sortedText = sortSentenceCharacters(dialogue.text, dialogue.bbox, null, actualImageHeight);
  138. }
  139. return {
  140. order: dialogue.order || index + 1,
  141. text: sortedText,
  142. bbox: dialogue.bbox || null,
  143. character_positions: dialogue.character_positions || null
  144. };
  145. });
  146. // 7. 生成对话JSON
  147. console.log('💾 步骤6: 生成对话JSON...');
  148. generateDialogJson(finalDialogues, path.basename(imagePath), outputJsonPath);
  149. console.log(`✅ 处理完成: ${path.basename(imagePath)} (${finalDialogues.length} 段对话)`);
  150. return { success: true, dialogues: finalDialogues.length };
  151. } catch (error) {
  152. console.error(`❌ 处理失败: ${path.basename(imagePath)} - ${error.message}`);
  153. return { success: false, error: error.message };
  154. }
  155. }
  156. /**
  157. * 批量处理目录下的所有图片
  158. */
  159. function batchProcessImages() {
  160. try {
  161. const projectRoot = getProjectRoot();
  162. const targetDir = path.join(
  163. projectRoot,
  164. 'static',
  165. '漫画',
  166. 'image',
  167. '鬼-巷第001卷',
  168. '第一章'
  169. );
  170. console.log('='.repeat(60));
  171. console.log('🚀 批量处理漫画图片');
  172. console.log('='.repeat(60));
  173. console.log(`📁 目标目录: ${targetDir}`);
  174. if (!fs.existsSync(targetDir)) {
  175. throw new Error(`目录不存在: ${targetDir}`);
  176. }
  177. // 读取目录下的所有图片文件
  178. const files = fs.readdirSync(targetDir);
  179. const imageExtensions = ['.jpeg', '.jpg', '.png', '.bmp'];
  180. const imageFiles = files
  181. .filter(file => {
  182. const ext = path.extname(file).toLowerCase();
  183. return imageExtensions.includes(ext);
  184. })
  185. .sort() // 按文件名排序
  186. .map(file => path.join(targetDir, file));
  187. if (imageFiles.length === 0) {
  188. console.log('⚠️ 未找到任何图片文件');
  189. return;
  190. }
  191. console.log(`📊 找到 ${imageFiles.length} 张图片\n`);
  192. const forceReprocess = process.env.FORCE_REPROCESS === 'true' || process.argv.includes('--force');
  193. // 统计信息
  194. let successCount = 0;
  195. let skipCount = 0;
  196. let failCount = 0;
  197. // 处理每张图片
  198. for (let i = 0; i < imageFiles.length; i++) {
  199. const imagePath = imageFiles[i];
  200. const result = processSingleImage(imagePath, projectRoot, forceReprocess);
  201. if (result.skipped) {
  202. skipCount++;
  203. } else if (result.success) {
  204. successCount++;
  205. } else {
  206. failCount++;
  207. }
  208. // 显示进度
  209. console.log(`\n进度: ${i + 1}/${imageFiles.length}`);
  210. }
  211. // 显示总结
  212. console.log('\n' + '='.repeat(60));
  213. console.log('📊 处理完成统计');
  214. console.log('='.repeat(60));
  215. console.log(`✅ 成功: ${successCount} 张`);
  216. console.log(`⏭️ 跳过: ${skipCount} 张`);
  217. console.log(`❌ 失败: ${failCount} 张`);
  218. console.log(`📁 总计: ${imageFiles.length} 张`);
  219. console.log('='.repeat(60));
  220. } catch (error) {
  221. console.error('\n' + '='.repeat(60));
  222. console.error('❌ 批量处理失败:', error.message);
  223. console.error('='.repeat(60));
  224. throw error;
  225. }
  226. }
  227. // 如果直接运行此脚本
  228. if (import.meta.url === `file://${process.argv[1]}` || process.argv[1]?.endsWith('generate-subtitle.js')) {
  229. batchProcessImages();
  230. }
  231. export { batchProcessImages, processSingleImage };