text-region-detector.js 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. /**
  2. * 漫画文字区域坐标检测器
  3. * 使用专门的漫画文字检测模型获取准确的文字区域坐标
  4. * 比通用OCR更适合漫画场景
  5. */
  6. import fs from 'fs';
  7. import path from 'path';
  8. import { fileURLToPath } from 'url';
  9. import { execSync } from 'child_process';
  10. import { getPythonPath } from './python-path.js';
  11. const __filename = fileURLToPath(import.meta.url);
  12. const __dirname = path.dirname(__filename);
  13. const projectRoot = path.join(__dirname, '..');
  14. /**
  15. * 获取文字区域坐标(使用漫画专用检测器)
  16. * @param {string} imagePath - 输入图片路径
  17. * @param {string} outputDir - 输出目录
  18. * @param {Object} detectorConfig - 检测器配置参数(可选)
  19. * @returns {Object} 文字区域检测结果
  20. */
  21. async function getTextRegionsCoordinates(imagePath, outputDir, detectorConfig = {}) {
  22. try {
  23. console.log('📍 开始检测文字区域坐标(漫画专用检测器)');
  24. console.log(`📷 输入图片: ${imagePath}`);
  25. console.log(`📂 输出目录: ${outputDir}`);
  26. // 验证参数
  27. if (!imagePath || !fs.existsSync(imagePath)) {
  28. throw new Error(`图片文件不存在: ${imagePath}`);
  29. }
  30. if (!outputDir) {
  31. throw new Error('outputDir 参数不能为空');
  32. }
  33. // 确保输出目录存在
  34. if (!fs.existsSync(outputDir)) {
  35. fs.mkdirSync(outputDir, { recursive: true });
  36. }
  37. // 调用检测器
  38. const result = await runComicTextDetection(imagePath, outputDir, detectorConfig);
  39. console.log('✅ 文字区域坐标检测完成');
  40. console.log(`📊 检测到 ${result.total_count} 个文字区域`);
  41. return result;
  42. } catch (error) {
  43. console.error(`❌ 文字区域坐标检测失败: ${error.message}`);
  44. throw error;
  45. }
  46. }
  47. /**
  48. * 运行漫画文字检测
  49. * @param {string} imagePath - 输入图片路径
  50. * @param {string} outputDir - 输出目录
  51. * @param {Object} detectorConfig - 检测器配置
  52. */
  53. async function runComicTextDetection(imagePath, outputDir, detectorConfig) {
  54. const pythonEnv = getPythonPath();
  55. const pythonScript = path.join(projectRoot, 'python', 'generate-anim', 'detect_comic_text_with_boxes.py');
  56. if (!fs.existsSync(pythonScript)) {
  57. throw new Error(`Python脚本不存在: ${pythonScript}`);
  58. }
  59. // 设置默认检测参数(针对坐标检测优化)
  60. const defaultParams = {
  61. inputSize: 1536, // 高精度处理
  62. confThresh: 0.3, // 较低置信度阈值,检测更多区域
  63. nmsThresh: 0.25, // 较低NMS阈值,保留更多候选框
  64. maskThresh: 0.3,
  65. act: 'leaky',
  66. refineMode: 1, // 使用ANNOTATION模式,获得更精确的边界
  67. keepUndetectedMask: 0,
  68. erodeIterations: 0, // 不腐蚀,保持原始大小
  69. invertMask: 0 // 不需要反转遮罩
  70. };
  71. // 合并用户配置
  72. const finalParams = { ...defaultParams, ...detectorConfig };
  73. // 构建命令
  74. const command = `"${pythonEnv}" "${pythonScript}" "${imagePath}" "${outputDir}" "${projectRoot}" ${finalParams.inputSize} ${finalParams.confThresh} ${finalParams.nmsThresh} ${finalParams.maskThresh} "${finalParams.act}" ${finalParams.refineMode} ${finalParams.keepUndetectedMask} ${finalParams.erodeIterations} ${finalParams.invertMask}`;
  75. console.log(`🔍 正在检测文字区域: ${path.basename(imagePath)}`);
  76. console.log(`⚙️ 检测配置: 精度=${finalParams.inputSize}, 置信度=${finalParams.confThresh}, NMS=${finalParams.nmsThresh}`);
  77. // 执行检测
  78. execSync(command, {
  79. encoding: 'utf-8',
  80. stdio: 'inherit',
  81. cwd: projectRoot,
  82. env: {
  83. ...process.env,
  84. PYTHONIOENCODING: 'utf-8',
  85. PYTHONUTF8: '1'
  86. },
  87. shell: true
  88. });
  89. // 读取检测结果
  90. const imageName = path.basename(imagePath, path.extname(imagePath));
  91. const textRegionsJsonPath = path.join(outputDir, `${imageName}_text_regions.json`);
  92. const ocrCompatibleJsonPath = path.join(outputDir, `${imageName}_dialogues.json`);
  93. // 等待文件生成
  94. await waitForFileGeneration(textRegionsJsonPath);
  95. // 读取详细文字区域数据
  96. const textRegionsData = JSON.parse(fs.readFileSync(textRegionsJsonPath, 'utf-8'));
  97. // 读取OCR兼容格式数据
  98. let ocrCompatibleData = null;
  99. if (fs.existsSync(ocrCompatibleJsonPath)) {
  100. ocrCompatibleData = JSON.parse(fs.readFileSync(ocrCompatibleJsonPath, 'utf-8'));
  101. }
  102. return {
  103. ...textRegionsData,
  104. ocr_compatible_data: ocrCompatibleData,
  105. detection_source: 'comic-text-detector',
  106. detection_params: finalParams,
  107. output_files: {
  108. text_regions_json: textRegionsJsonPath,
  109. ocr_compatible_json: ocrCompatibleJsonPath
  110. }
  111. };
  112. }
  113. /**
  114. * 创建检测器配置
  115. * @param {Object} customConfig - 自定义配置
  116. * @returns {Object} 检测器配置
  117. */
  118. function createDetectorConfig(customConfig = {}) {
  119. const defaultConfig = {
  120. // 基础检测参数
  121. inputSize: 1536, // 输入尺寸(640/960/1280/1536/2048)
  122. confThresh: 0.3, // 置信度阈值(0.1-0.8,越低检测越多)
  123. nmsThresh: 0.25, // NMS阈值(0.1-0.5,越低保留越多重叠框)
  124. // 高级参数
  125. maskThresh: 0.3, // 分割阈值(0.1-0.5)
  126. act: 'leaky', // 激活函数(leaky/relu)
  127. refineMode: 1, // 精炼模式(0=INPAINT填充,1=ANNOTATION标注)
  128. // 预设配置
  129. preset: "balanced" // 预设(fast/balanced/precise)
  130. };
  131. return { ...defaultConfig, ...customConfig };
  132. }
  133. /**
  134. * 获取预设检测配置
  135. * @param {string} presetName - 预设名称
  136. * @returns {Object} 预设配置
  137. */
  138. function getDetectorPreset(presetName) {
  139. const presets = {
  140. // 快速检测(速度优先)
  141. fast: {
  142. inputSize: 960,
  143. confThresh: 0.5,
  144. nmsThresh: 0.4,
  145. refineMode: 0
  146. },
  147. // 平衡检测(默认)
  148. balanced: {
  149. inputSize: 1280,
  150. confThresh: 0.3,
  151. nmsThresh: 0.25,
  152. refineMode: 1
  153. },
  154. // 精确检测(质量优先)
  155. precise: {
  156. inputSize: 1536,
  157. confThresh: 0.2,
  158. nmsThresh: 0.2,
  159. refineMode: 1
  160. }
  161. };
  162. return presets[presetName] || presets.balanced;
  163. }
  164. /**
  165. * 等待文件生成
  166. * @param {string} filePath - 文件路径
  167. */
  168. async function waitForFileGeneration(filePath) {
  169. let retries = 50;
  170. while (retries > 0 && !fs.existsSync(filePath)) {
  171. await new Promise(resolve => setTimeout(resolve, 100));
  172. retries--;
  173. }
  174. if (!fs.existsSync(filePath)) {
  175. throw new Error(`文件未生成: ${filePath}`);
  176. }
  177. }
  178. export {
  179. getTextRegionsCoordinates,
  180. createDetectorConfig,
  181. getDetectorPreset
  182. };