| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214 |
- /**
- * 漫画文字区域坐标检测器
- * 使用专门的漫画文字检测模型获取准确的文字区域坐标
- * 比通用OCR更适合漫画场景
- */
- import fs from 'fs';
- import path from 'path';
- import { fileURLToPath } from 'url';
- import { execSync } from 'child_process';
- import { getPythonPath } from './python-path.js';
- const __filename = fileURLToPath(import.meta.url);
- const __dirname = path.dirname(__filename);
- const projectRoot = path.join(__dirname, '..');
- /**
- * 获取文字区域坐标(使用漫画专用检测器)
- * @param {string} imagePath - 输入图片路径
- * @param {string} outputDir - 输出目录
- * @param {Object} detectorConfig - 检测器配置参数(可选)
- * @returns {Object} 文字区域检测结果
- */
- async function getTextRegionsCoordinates(imagePath, outputDir, detectorConfig = {}) {
- try {
- console.log('📍 开始检测文字区域坐标(漫画专用检测器)');
- console.log(`📷 输入图片: ${imagePath}`);
- console.log(`📂 输出目录: ${outputDir}`);
-
- // 验证参数
- if (!imagePath || !fs.existsSync(imagePath)) {
- throw new Error(`图片文件不存在: ${imagePath}`);
- }
- if (!outputDir) {
- throw new Error('outputDir 参数不能为空');
- }
-
- // 确保输出目录存在
- if (!fs.existsSync(outputDir)) {
- fs.mkdirSync(outputDir, { recursive: true });
- }
-
- // 调用检测器
- const result = await runComicTextDetection(imagePath, outputDir, detectorConfig);
-
- console.log('✅ 文字区域坐标检测完成');
- console.log(`📊 检测到 ${result.total_count} 个文字区域`);
-
- return result;
-
- } catch (error) {
- console.error(`❌ 文字区域坐标检测失败: ${error.message}`);
- throw error;
- }
- }
- /**
- * 运行漫画文字检测
- * @param {string} imagePath - 输入图片路径
- * @param {string} outputDir - 输出目录
- * @param {Object} detectorConfig - 检测器配置
- */
- async function runComicTextDetection(imagePath, outputDir, detectorConfig) {
- const pythonEnv = getPythonPath();
- const pythonScript = path.join(projectRoot, 'python', 'generate-anim', 'detect_comic_text_with_boxes.py');
-
- if (!fs.existsSync(pythonScript)) {
- throw new Error(`Python脚本不存在: ${pythonScript}`);
- }
-
- // 设置默认检测参数(针对坐标检测优化)
- const defaultParams = {
- inputSize: 1536, // 高精度处理
- confThresh: 0.3, // 较低置信度阈值,检测更多区域
- nmsThresh: 0.25, // 较低NMS阈值,保留更多候选框
- maskThresh: 0.3,
- act: 'leaky',
- refineMode: 1, // 使用ANNOTATION模式,获得更精确的边界
- keepUndetectedMask: 0,
- erodeIterations: 0, // 不腐蚀,保持原始大小
- invertMask: 0 // 不需要反转遮罩
- };
-
- // 合并用户配置
- const finalParams = { ...defaultParams, ...detectorConfig };
-
- // 构建命令
- const command = `"${pythonEnv}" "${pythonScript}" "${imagePath}" "${outputDir}" "${projectRoot}" ${finalParams.inputSize} ${finalParams.confThresh} ${finalParams.nmsThresh} ${finalParams.maskThresh} "${finalParams.act}" ${finalParams.refineMode} ${finalParams.keepUndetectedMask} ${finalParams.erodeIterations} ${finalParams.invertMask}`;
-
- console.log(`🔍 正在检测文字区域: ${path.basename(imagePath)}`);
- console.log(`⚙️ 检测配置: 精度=${finalParams.inputSize}, 置信度=${finalParams.confThresh}, NMS=${finalParams.nmsThresh}`);
-
- // 执行检测
- execSync(command, {
- encoding: 'utf-8',
- stdio: 'inherit',
- cwd: projectRoot,
- env: {
- ...process.env,
- PYTHONIOENCODING: 'utf-8',
- PYTHONUTF8: '1'
- },
- shell: true
- });
-
- // 读取检测结果
- const imageName = path.basename(imagePath, path.extname(imagePath));
- const textRegionsJsonPath = path.join(outputDir, `${imageName}_text_regions.json`);
- const ocrCompatibleJsonPath = path.join(outputDir, `${imageName}_dialogues.json`);
-
- // 等待文件生成
- await waitForFileGeneration(textRegionsJsonPath);
-
- // 读取详细文字区域数据
- const textRegionsData = JSON.parse(fs.readFileSync(textRegionsJsonPath, 'utf-8'));
-
- // 读取OCR兼容格式数据
- let ocrCompatibleData = null;
- if (fs.existsSync(ocrCompatibleJsonPath)) {
- ocrCompatibleData = JSON.parse(fs.readFileSync(ocrCompatibleJsonPath, 'utf-8'));
- }
-
- return {
- ...textRegionsData,
- ocr_compatible_data: ocrCompatibleData,
- detection_source: 'comic-text-detector',
- detection_params: finalParams,
- output_files: {
- text_regions_json: textRegionsJsonPath,
- ocr_compatible_json: ocrCompatibleJsonPath
- }
- };
- }
- /**
- * 创建检测器配置
- * @param {Object} customConfig - 自定义配置
- * @returns {Object} 检测器配置
- */
- function createDetectorConfig(customConfig = {}) {
- const defaultConfig = {
- // 基础检测参数
- inputSize: 1536, // 输入尺寸(640/960/1280/1536/2048)
- confThresh: 0.3, // 置信度阈值(0.1-0.8,越低检测越多)
- nmsThresh: 0.25, // NMS阈值(0.1-0.5,越低保留越多重叠框)
-
- // 高级参数
- maskThresh: 0.3, // 分割阈值(0.1-0.5)
- act: 'leaky', // 激活函数(leaky/relu)
- refineMode: 1, // 精炼模式(0=INPAINT填充,1=ANNOTATION标注)
-
- // 预设配置
- preset: "balanced" // 预设(fast/balanced/precise)
- };
-
- return { ...defaultConfig, ...customConfig };
- }
- /**
- * 获取预设检测配置
- * @param {string} presetName - 预设名称
- * @returns {Object} 预设配置
- */
- function getDetectorPreset(presetName) {
- const presets = {
- // 快速检测(速度优先)
- fast: {
- inputSize: 960,
- confThresh: 0.5,
- nmsThresh: 0.4,
- refineMode: 0
- },
-
- // 平衡检测(默认)
- balanced: {
- inputSize: 1280,
- confThresh: 0.3,
- nmsThresh: 0.25,
- refineMode: 1
- },
-
- // 精确检测(质量优先)
- precise: {
- inputSize: 1536,
- confThresh: 0.2,
- nmsThresh: 0.2,
- refineMode: 1
- }
- };
-
- return presets[presetName] || presets.balanced;
- }
- /**
- * 等待文件生成
- * @param {string} filePath - 文件路径
- */
- async function waitForFileGeneration(filePath) {
- let retries = 50;
- while (retries > 0 && !fs.existsSync(filePath)) {
- await new Promise(resolve => setTimeout(resolve, 100));
- retries--;
- }
-
- if (!fs.existsSync(filePath)) {
- throw new Error(`文件未生成: ${filePath}`);
- }
- }
- export {
- getTextRegionsCoordinates,
- createDetectorConfig,
- getDetectorPreset
- };
|