| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682 |
- /**
- * 步骤:
- * 1. 新建接收imagePath参数
- * 2. 新建接收textMaskImgPath参数 用来接收输出文字区域识别框图片路径
- * 3. 新建接收textBlocksJsonPath参数用来接收输出文字区域JSON路径
- * 4. 调用comic-text-detector生成文字区域坐标JSON
- * 5. 复制imagePath 在这个图片基础上,根据json坐标绘制出所有文字区域绿色线框
- * 5. 保存带有绿色线框的图片保存到textMaskImgPath路径
- */
- import fs from 'fs';
- import path from 'path';
- import { fileURLToPath } from 'url';
- import { execSync } from 'child_process';
- import { getPythonPath } from './python-path.js';
- const __filename = fileURLToPath(import.meta.url);
- const __dirname = path.dirname(__filename);
- const projectRoot = path.join(__dirname, '..');
- /**
- * 预设检测配置
- */
- const DETECTION_PRESETS = {
- // 标准配置(默认)
- standard: {
- inputSize: 1536, // 输入尺寸,越大精度越高但速度越慢
- confThresh: 0.1, // 置信度阈值,0-1,越高越严格
- nmsThresh: 0.1, // NMS阈值,0-1,越高保留越多重叠框
- maskThresh: 0.3, // Mask阈值,0-1,用于分割网络
- act: 'leaky', // 激活函数,可选'leaky'或'relu'
- refineMode: 0, // 0=INPAINT, 1=ANNOTATION
- keepUndetectedMask: 0, // 是否保留未检测区域
- erodeIterations: 0, // 腐蚀迭代次数
- invertMask: 1 // 反转mask(白底黑字)
- },
-
- // 高精度配置(保留更多文字细节)
- high_detail: {
- inputSize: 2048, // 更大输入尺寸,保持更多细节
- confThresh: 0.05, // 更低置信度阈值,捕获更多文字
- nmsThresh: 0.2, // 适中NMS阈值,保留更多重叠区域
- maskThresh: 0.2, // 更低Mask阈值,保留更多细节
- act: 'leaky',
- refineMode: 0,
- keepUndetectedMask: 0,
- erodeIterations: -1, // 负数表示膨胀操作,让文字更粗
- invertMask: 1
- },
-
- // 超高精度配置(最大化保留细节,速度较慢)
- ultra_detail: {
- inputSize: 3072, // 极高输入尺寸(原图1334x1940的2.3倍)
- confThresh: 0.01, // 极低置信度阈值,捕获所有可能的文字
- nmsThresh: 0.3, // 更宽松的NMS,保留更多重叠区域
- maskThresh: 0.05, // 极低的Mask阈值,最大化细节保留
- act: 'leaky',
- refineMode: 0,
- keepUndetectedMask: 0,
- erodeIterations: -3, // 3次膨胀操作,强力填充文字空洞和细节
- invertMask: 1
- },
-
- // 日文精细优化配置(专门针对复杂汉字和假名)
- japanese_fine: {
- inputSize: 2048, // 适中输入尺寸,平衡质量和处理效果
- confThresh: 0.08, // 较低置信度,但避免过多噪声
- nmsThresh: 0.2, // 适中的NMS,保留重要重叠
- maskThresh: 0.2, // 适中Mask阈值,平衡细节和清晰度
- act: 'leaky',
- refineMode: 0, // INPAINT模式,更好的文字连贯性
- keepUndetectedMask: 0,
- erodeIterations: -1, // 只1次膨胀,轻微填充空洞但保持清晰
- invertMask: 1
- },
-
- // 清晰优先配置(减少模糊,保持锐利)
- sharp_detail: {
- inputSize: 2048, // 高分辨率但不过度
- confThresh: 0.15, // 更高置信度,减少噪声
- nmsThresh: 0.15, // 较严格NMS,避免重叠干扰
- maskThresh: 0.25, // 中等Mask阈值,保持清晰度
- act: 'leaky',
- refineMode: 0, // INPAINT模式
- keepUndetectedMask: 0,
- erodeIterations: 0, // 不做形态学操作,保持原始锐度
- invertMask: 1
- },
-
- // 极致清晰配置(最锐利文字)
- ultra_sharp: {
- inputSize: 1536, // 适中尺寸,避免过度处理
- confThresh: 0.3, // 高置信度,只保留清晰文字
- nmsThresh: 0.1, // 严格NMS,避免重叠模糊
- maskThresh: 0.4, // 高阈值,只保留最清晰部分
- act: 'leaky',
- refineMode: 0,
- keepUndetectedMask: 0,
- erodeIterations: 1, // 1次腐蚀,让文字更细更锐利
- invertMask: 1
- },
-
- // 锐化专用配置(腐蚀+高阈值)
- crisp_text: {
- inputSize: 1280, // 较小尺寸,减少噪声
- confThresh: 0.4, // 很高置信度
- nmsThresh: 0.05, // 非常严格NMS
- maskThresh: 0.5, // 很高Mask阈值
- act: 'leaky',
- refineMode: 0,
- keepUndetectedMask: 0,
- erodeIterations: 2, // 2次腐蚀,强力锐化
- invertMask: 1
- },
-
- // 极致锐化配置(最强腐蚀+极高阈值)
- extreme_sharp: {
- inputSize: 1280, // 图像处理尺寸,适中避免过度处理
- confThresh: 0.5, // 置信度阈值,越高越严格
- nmsThresh: 0.03, // NMS阈值,越低越严格
- maskThresh: 0.6, // Mask阈值,越高越清晰
- act: 'leaky', // 激活函数
- refineMode: 0, // 0=INPAINT, 1=ANNOTATION
- keepUndetectedMask: 0,
- erodeIterations: 3, // 腐蚀次数,正数=腐蚀让文字更细更锐利
- invertMask: 1 // 反转mask(白底黑字)
- },
-
- // 精细平衡配置(轻微腐蚀+适中阈值,让文字细但可见)
- fine_balance: {
- inputSize: 1536, // 适中分辨率,保持细节
- confThresh: 0.3, // 适中置信度,保留更多文字
- nmsThresh: 0.1, // 适中NMS
- maskThresh: 0.4, // 适中阈值,平衡清晰度和可见性
- act: 'leaky',
- refineMode: 0,
- keepUndetectedMask: 0,
- erodeIterations: 1, // 只1次腐蚀,让文字细但不消失
- invertMask: 1
- },
-
- // 微调锐化配置(最小腐蚀+优化阈值)
- subtle_sharp: {
- inputSize: 1536, // 高分辨率处理
- confThresh: 0.25, // 较低置信度,保留细节
- nmsThresh: 0.15, // 适中NMS
- maskThresh: 0.35, // 适中阈值
- act: 'leaky',
- refineMode: 0,
- keepUndetectedMask: 0,
- erodeIterations: 1, // 1次轻微腐蚀,温和锐化
- invertMask: 1
- },
-
- // 超细文字配置(2次腐蚀但保持可见性)
- ultra_thin: {
- inputSize: 1536, // 高分辨率保持细节
- confThresh: 0.2, // 更低置信度,确保不丢失细节
- nmsThresh: 0.12, // 适中NMS
- maskThresh: 0.45, // 稍高阈值,让文字更细
- act: 'leaky',
- refineMode: 0,
- keepUndetectedMask: 0,
- erodeIterations: 2, // 2次腐蚀,让文字更细但仍可见
- invertMask: 1
- },
-
- // 极细文字配置(接近极限的细度)
- super_thin: {
- inputSize: 1536, // 高分辨率
- confThresh: 0.15, // 很低置信度,保留所有细节
- nmsThresh: 0.08, // 严格NMS
- maskThresh: 0.45, // 高阈值,让文字非常细
- act: 'leaky',
- refineMode: 0,
- keepUndetectedMask: 0,
- erodeIterations: 2.5, // 2.5次腐蚀(会被取整为2,但参数更激进)
- invertMask: 1
- }
- };
- /**
- * 步骤1&2&3: 生成带绿色线框的文字遮罩图和坐标JSON
- * @param {string} imagePath - 步骤1: 输入图片路径参数
- * @param {string} textMaskImgPath - 步骤2: 输出文字遮罩图路径参数
- * @param {string} textBlocksJsonPath - 步骤3: 输出文字区域JSON路径参数
- * @param {string|Object} detectionConfig - 检测配置:'standard'/'high_detail'/'ultra_detail' 或自定义配置对象
- * @returns {Object} 生成结果
- */
- async function startComicTextDetector(imagePath, textMaskImgPath, textBlocksJsonPath) {
- try {
- console.log('📖 开始生成带绿色线框的文字区域识别图和坐标JSON');
- console.log(`📷 输入图片: ${imagePath}`);
- console.log(`🎯 输出识别框图: ${textMaskImgPath}`);
- console.log(`📄 输出JSON: ${textBlocksJsonPath}`);
-
- // 步骤1: 验证imagePath参数
- if (!imagePath) {
- throw new Error('imagePath 参数不能为空');
- }
-
- if (!fs.existsSync(imagePath)) {
- throw new Error(`图片文件不存在: ${imagePath}`);
- }
-
- // 步骤2: 验证textMaskImgPath参数
- if (!textMaskImgPath) {
- throw new Error('textMaskImgPath 参数不能为空');
- }
-
- // 步骤3: 验证textBlocksJsonPath参数
- if (!textBlocksJsonPath) {
- throw new Error('textBlocksJsonPath 参数不能为空');
- }
-
- // 确保输出目录存在
- const outputDir = path.dirname(textMaskImgPath);
- if (!fs.existsSync(outputDir)) {
- fs.mkdirSync(outputDir, { recursive: true });
- }
-
- const jsonOutputDir = path.dirname(textBlocksJsonPath);
- if (!fs.existsSync(jsonOutputDir)) {
- fs.mkdirSync(jsonOutputDir, { recursive: true });
- }
-
- // 步骤4: 调用comic-text-detector生成文字区域坐标JSON
- console.log('\n🔍 步骤4: 正在调用文字检测器生成文字区域坐标...');
- const textRegions = await generateTextRegionsOnly(imagePath, outputDir, 'ultra_thin');
-
- // 步骤5: 保存坐标JSON文件
- console.log('\n📄 步骤5: 生成坐标JSON文件...');
- await saveTextRegionsJson(textRegions, textBlocksJsonPath);
-
- // 步骤6: 复制原图片并在其上绘制绿色识别框
- console.log('\n🎨 步骤6: 在原图片上绘制绿色文字区域识别框...');
- await drawGreenBoxesOnOriginalImage(imagePath, textRegions, textMaskImgPath);
-
- // 步骤7: 验证文件保存
- console.log('\n💾 步骤7: 验证文件保存...');
- await verifyMaskSaved(textMaskImgPath); // 使用现有函数验证图片
- await verifyJsonSaved(textBlocksJsonPath);
-
- console.log('✅ 带绿色线框的文字区域识别图和坐标JSON生成完成');
- return {
- textMaskImgPath: textMaskImgPath,
- textBlocksJsonPath: textBlocksJsonPath,
- success: true
- };
-
- } catch (error) {
- console.error(`❌ 带绿色线框的文字区域识别图生成失败: ${error.message}`);
- throw error;
- }
- }
- /**
- * 步骤4: 调用comic-text-detector只生成文字区域坐标
- * @param {string} imagePath - 输入图片路径
- * @param {string} outputDir - 输出目录
- * @param {string|Object} detectionConfig - 检测配置
- * @returns {Array} 文字区域数组
- */
- async function generateTextRegionsOnly(imagePath, outputDir, detectionConfig = 'standard') {
- const pythonEnv = getPythonPath();
- const pythonScript = path.join(projectRoot, 'python', 'generate-anim', 'detect_comic_text_with_boxes.py');
-
- // 检查Python脚本是否存在
- if (!fs.existsSync(pythonScript)) {
- throw new Error(`Python脚本不存在: ${pythonScript}`);
- }
-
- // 解析检测配置
- let params;
- if (typeof detectionConfig === 'string') {
- if (!DETECTION_PRESETS[detectionConfig]) {
- console.warn(`⚠️ 未知的预设配置: ${detectionConfig},使用标准配置`);
- params = DETECTION_PRESETS.standard;
- } else {
- params = DETECTION_PRESETS[detectionConfig];
- console.log(`📋 使用预设配置: ${detectionConfig}`);
- }
- } else if (typeof detectionConfig === 'object') {
- params = { ...DETECTION_PRESETS.standard, ...detectionConfig };
- console.log(`📋 使用自定义配置`);
- } else {
- params = DETECTION_PRESETS.standard;
- console.log(`📋 使用默认标准配置`);
- }
-
- // 构建命令 - 传递输出目录给Python脚本
- const command = `"${pythonEnv}" "${pythonScript}" "${imagePath}" "${outputDir}" "${projectRoot}" ${params.inputSize} ${params.confThresh} ${params.nmsThresh} ${params.maskThresh} "${params.act}" ${params.refineMode} ${params.keepUndetectedMask} ${params.erodeIterations} ${params.invertMask}`;
-
- console.log(`🔍 正在检测图片中的文字区域: ${path.basename(imagePath)}`);
- console.log(`⚙️ 检测参数: 尺寸=${params.inputSize}, 置信度=${params.confThresh}, Mask阈值=${params.maskThresh}, 腐蚀=${params.erodeIterations}`);
-
- // 执行Python脚本生成文字区域坐标JSON
- execSync(command, {
- encoding: 'utf-8',
- stdio: 'inherit',
- cwd: projectRoot,
- env: {
- ...process.env,
- PYTHONIOENCODING: 'utf-8',
- PYTHONUTF8: '1'
- },
- shell: true
- });
-
- // 读取生成的文字区域坐标JSON文件
- const baseImageName = path.basename(imagePath, path.extname(imagePath));
- const textRegionsJsonPath = path.join(outputDir, `${baseImageName}_text_regions.json`);
-
- // 等待JSON文件生成
- let retries = 50;
- while (retries > 0 && !fs.existsSync(textRegionsJsonPath)) {
- await new Promise(resolve => setTimeout(resolve, 100));
- retries--;
- }
-
- if (!fs.existsSync(textRegionsJsonPath)) {
- throw new Error(`步骤4失败: 文字区域坐标文件未生成: ${textRegionsJsonPath}`);
- }
-
- const textRegionsData = JSON.parse(fs.readFileSync(textRegionsJsonPath, 'utf-8'));
-
- console.log(`✅ 文字区域检测完成: 检测到 ${textRegionsData.text_blocks.length} 个区域`);
- return textRegionsData.text_blocks;
- }
- /**
- * 步骤5: 处理坐标JSON文件
- * @param {string} imagePath - 原图路径
- * @param {string} outputDir - 临时输出目录
- * @param {string} targetJsonPath - 目标JSON路径
- * @returns {Array} 文字区域数组
- */
- async function processCoordinatesJson(imagePath, outputDir, targetJsonPath) {
- const imgBaseName = path.basename(imagePath, path.extname(imagePath));
- const textRegionsJsonPath = path.join(outputDir, `${imgBaseName}_text_regions.json`);
-
- // 等待文字区域JSON文件生成
- let retries = 50;
- while (retries > 0 && !fs.existsSync(textRegionsJsonPath)) {
- await new Promise(resolve => setTimeout(resolve, 100));
- retries--;
- }
-
- if (!fs.existsSync(textRegionsJsonPath)) {
- throw new Error(`步骤5失败: 文字区域JSON文件未生成: ${textRegionsJsonPath}`);
- }
-
- // 读取文字区域数据
- console.log(`📖 读取文字区域数据: ${path.basename(textRegionsJsonPath)}`);
- const textRegionsContent = fs.readFileSync(textRegionsJsonPath, 'utf-8');
- const textRegionsData = JSON.parse(textRegionsContent);
-
- // 转换为OCR兼容格式
- console.log(`🔄 转换为OCR兼容格式...`);
- const ocrCompatibleResult = {
- dialogues: []
- };
-
- const textRegions = [];
-
- if (textRegionsData.text_blocks && Array.isArray(textRegionsData.text_blocks)) {
- for (const block of textRegionsData.text_blocks) {
- const bbox = block.bbox;
- // 构造四个角点坐标(左上、右上、右下、左下)
- const bboxPoints = [
- [bbox.x1, bbox.y1], // 左上
- [bbox.x2, bbox.y1], // 右上
- [bbox.x2, bbox.y2], // 右下
- [bbox.x1, bbox.y2] // 左下
- ];
-
- const region = {
- bbox: bboxPoints,
- text: `[文字区域${block.index}]`, // 占位符文字
- confidence: 0.95, // 高置信度,因为是专门的检测器
- source: 'comic-text-detector',
- region_info: {
- width: bbox.width,
- height: bbox.height,
- center_x: bbox.center_x,
- center_y: bbox.center_y,
- vertical: block.vertical,
- language: block.language
- }
- };
-
- ocrCompatibleResult.dialogues.push(region);
- textRegions.push(region);
- }
- }
-
- // 对文字区域进行排序:从右到左,从上到下
- console.log(`🔄 对文字区域进行排序(从右到左,从上到下)...`);
- textRegions.sort((a, b) => {
- const centerA_x = (a.bbox[0][0] + a.bbox[2][0]) / 2; // A区域中心点x
- const centerA_y = (a.bbox[0][1] + a.bbox[2][1]) / 2; // A区域中心点y
- const centerB_x = (b.bbox[0][0] + b.bbox[2][0]) / 2; // B区域中心点x
- const centerB_y = (b.bbox[0][1] + b.bbox[2][1]) / 2; // B区域中心点y
-
- // 首先按x坐标降序排序(越靠右越靠前)
- if (Math.abs(centerA_x - centerB_x) > 50) { // 如果x坐标差距较大,按x排序
- return centerB_x - centerA_x; // 降序:右边的在前
- }
- // 如果x坐标相近,按y坐标升序排序(越靠上越靠前)
- return centerA_y - centerB_y; // 升序:上面的在前
- });
-
- // 更新排序后的OCR兼容结果
- ocrCompatibleResult.dialogues = textRegions;
-
- // 保存到指定路径
- console.log(`💾 保存OCR兼容JSON: ${path.basename(targetJsonPath)}`);
- fs.writeFileSync(targetJsonPath, JSON.stringify(ocrCompatibleResult, null, 2), 'utf-8');
-
- console.log(`✅ 转换完成: ${ocrCompatibleResult.dialogues.length} 个文字区域(已按从右到左、从上到下排序)`);
- return textRegions;
- }
- /**
- * 步骤5: 保存文字区域坐标JSON文件
- * @param {Array} textRegions - 文字区域数组
- * @param {string} textBlocksJsonPath - 目标JSON文件路径
- */
- async function saveTextRegionsJson(textRegions, textBlocksJsonPath) {
- console.log('🔄 转换为OCR兼容格式...');
-
- // 将文字区域转换为OCR兼容的dialogues格式
- const dialogues = textRegions.map((block, index) => {
- // bbox格式: {x1: number, y1: number, x2: number, y2: number, ...}
- const bbox = block.bbox;
- const x1 = bbox.x1;
- const y1 = bbox.y1;
- const x2 = bbox.x2;
- const y2 = bbox.y2;
- const centerX = (x1 + x2) / 2;
- const centerY = (y1 + y2) / 2;
-
- return {
- bbox: [x1, y1, x2, y2], // 矩形格式
- center: [centerX, centerY],
- text: "", // 暂时为空,等待OCR识别
- confidence: block.confidence || 0.8,
- region_id: index + 1,
- source: "comic-text-detector"
- };
- });
-
- // 对文字区域进行排序:从右到左,然后从上到下
- console.log('🔄 对文字区域进行排序(从右到左,从上到下)...');
- dialogues.sort((a, b) => {
- // 先按X坐标从右到左排序(X值大的在前)
- const xDiff = b.center[0] - a.center[0];
- if (Math.abs(xDiff) > 50) { // 如果X坐标差距超过50像素,按X排序
- return xDiff;
- }
- // 如果X坐标接近,按Y坐标从上到下排序(Y值小的在前)
- return a.center[1] - b.center[1];
- });
-
- // 重新分配region_id
- dialogues.forEach((dialogue, index) => {
- dialogue.region_id = index + 1;
- });
-
- const resultJson = {
- image_file: "输入图片",
- dialogues: dialogues,
- total_count: dialogues.length,
- source: "comic-text-detector",
- processing_time: new Date().toISOString()
- };
-
- console.log(`💾 保存OCR兼容JSON: ${path.basename(textBlocksJsonPath)}`);
- fs.writeFileSync(textBlocksJsonPath, JSON.stringify(resultJson, null, 2), 'utf-8');
-
- console.log(`✅ 转换完成: ${dialogues.length} 个文字区域(已按从右到左、从上到下排序)`);
- }
- /**
- * 步骤6: 在原图片上绘制绿色文字区域识别框
- * @param {string} originalImagePath - 原图片路径
- * @param {Array} textRegions - 文字区域数组
- * @param {string} outputImagePath - 输出图片路径
- */
- async function drawGreenBoxesOnOriginalImage(originalImagePath, textRegions, outputImagePath) {
- const pythonEnv = getPythonPath();
- const drawScript = path.join(projectRoot, 'python', 'generate-anim', 'draw_green_boxes_on_original_image.py');
-
- // 创建绘制绿色边框的Python脚本(如果不存在)
- if (!fs.existsSync(drawScript)) {
- console.log('📝 创建绘制绿色边框的Python脚本...');
- await createDrawGreenBoxesOnOriginalImageScript(drawScript);
- }
-
- // 将文字区域数据转换为Python脚本期望的格式并写入临时JSON文件
- const tempJsonPath = path.join(path.dirname(outputImagePath), 'temp_text_regions_for_drawing.json');
-
- // 转换数据格式:将{x1,y1,x2,y2}格式转换为[[x1,y1],[x2,y1],[x2,y2],[x1,y2]]格式
- const pythonFormatRegions = textRegions.map(region => ({
- bbox: [
- [region.bbox.x1, region.bbox.y1], // 左上
- [region.bbox.x2, region.bbox.y1], // 右上
- [region.bbox.x2, region.bbox.y2], // 右下
- [region.bbox.x1, region.bbox.y2] // 左下
- ],
- index: region.index,
- vertical: region.vertical || false
- }));
-
- fs.writeFileSync(tempJsonPath, JSON.stringify(pythonFormatRegions, null, 2), 'utf-8');
-
- const absOriginalImagePath = path.resolve(originalImagePath);
- const absOutputImagePath = path.resolve(outputImagePath);
- const absTempJsonPath = path.resolve(tempJsonPath);
-
- const command = `"${pythonEnv}" "${drawScript}" "${absOriginalImagePath}" "${absTempJsonPath}" "${absOutputImagePath}"`;
-
- console.log(`🎨 在原图片上绘制 ${textRegions.length} 个绿色文字区域识别框...`);
-
- try {
- execSync(command, {
- encoding: 'utf-8',
- stdio: 'inherit',
- cwd: projectRoot,
- env: {
- ...process.env,
- PYTHONIOENCODING: 'utf-8',
- PYTHONUTF8: '1'
- },
- shell: true
- });
-
- console.log(`✅ 绿色识别框绘制完成: ${path.basename(outputImagePath)}`);
- } finally {
- // 删除临时文件
- if (fs.existsSync(tempJsonPath)) {
- fs.unlinkSync(tempJsonPath);
- }
- }
- }
- /**
- * 创建在原图片上绘制绿色边框的Python脚本
- * @param {string} scriptPath - 脚本路径
- */
- async function createDrawGreenBoxesOnOriginalImageScript(scriptPath) {
- const scriptContent = `#!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 在遮罩图上绘制绿色文字区域边框
- """
- import cv2
- import json
- import sys
- from pathlib import Path
- import numpy as np
- def draw_green_boxes_on_original_image(image_path, regions_json_path, output_path):
- """
- 在原图片上绘制绿色边框(支持中文路径)
- """
- # 读取原图片(支持中文路径)
- image_data = np.fromfile(str(image_path), dtype=np.uint8)
- image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
- if image is None:
- raise ValueError(f"无法读取图片: {image_path}")
-
- print(f"[INFO] 图片尺寸: {image.shape[1]}x{image.shape[0]}")
-
- # 读取文字区域JSON
- with open(regions_json_path, 'r', encoding='utf-8') as f:
- text_regions = json.load(f)
-
- print(f"[INFO] 需要绘制 {len(text_regions)} 个绿色边框")
-
- # 绘制每个文字区域的绿色边框
- for i, region in enumerate(text_regions):
- bbox = region['bbox']
- # bbox格式: [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
- x1, y1 = int(bbox[0][0]), int(bbox[0][1])
- x2, y2 = int(bbox[2][0]), int(bbox[2][1])
-
- # 绘制绿色矩形框
- cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 3) # 绿色,线宽3
-
- # 移除了编号相关逻辑
-
- print(f"[INFO] 绘制区域 {i+1}: ({x1},{y1}) -> ({x2},{y2})")
-
- # 保存结果(支持中文路径)
- success, encoded_img = cv2.imencode('.png', image)
- if success:
- encoded_img.tofile(str(output_path))
- print(f"[SUCCESS] 已保存带绿色边框的图片: {output_path}")
- else:
- raise RuntimeError(f"保存图片失败: {output_path}")
- def main():
- if len(sys.argv) != 4:
- print("用法: python draw_green_boxes_on_original_image.py <原图片路径> <区域JSON路径> <输出图片路径>")
- sys.exit(1)
-
- image_path = Path(sys.argv[1])
- regions_json_path = Path(sys.argv[2])
- output_path = Path(sys.argv[3])
-
- try:
- draw_green_boxes_on_original_image(image_path, regions_json_path, output_path)
- except Exception as e:
- print(f"[ERROR] 绘制失败: {e}")
- sys.exit(1)
- if __name__ == "__main__":
- main()
- `;
- // 确保目录存在
- const scriptDir = path.dirname(scriptPath);
- if (!fs.existsSync(scriptDir)) {
- fs.mkdirSync(scriptDir, { recursive: true });
- }
-
- fs.writeFileSync(scriptPath, scriptContent, 'utf-8');
- console.log(`✅ Python绘制脚本已创建: ${path.basename(scriptPath)}`);
- }
- /**
- * 步骤7: 验证带绿色框的图片已保存到textMaskImgPath路径
- * @param {string} textMaskImgPath - 带绿色框的图片路径
- */
- async function verifyMaskSaved(textMaskImgPath) {
- // 等待文件生成
- let retries = 50;
- while (retries > 0 && !fs.existsSync(textMaskImgPath)) {
- await new Promise(resolve => setTimeout(resolve, 100));
- retries--;
- }
-
- if (!fs.existsSync(textMaskImgPath)) {
- throw new Error(`步骤7失败: 带绿色边框的图片未保存到指定路径: ${textMaskImgPath}`);
- }
-
- // 验证文件大小
- const stats = fs.statSync(textMaskImgPath);
- if (stats.size === 0) {
- throw new Error(`步骤7失败: 生成的图片文件为空: ${textMaskImgPath}`);
- }
-
- console.log(`✅ 步骤7完成: 带绿色边框的图片已保存到 ${path.basename(textMaskImgPath)} (${Math.round(stats.size / 1024)}KB)`);
- }
- /**
- * 步骤7: 验证JSON文件已保存
- * @param {string} jsonPath - JSON文件路径
- */
- async function verifyJsonSaved(jsonPath) {
- if (!fs.existsSync(jsonPath)) {
- throw new Error(`步骤7失败: 坐标JSON未保存到指定路径: ${jsonPath}`);
- }
-
- // 验证文件内容
- try {
- const jsonContent = fs.readFileSync(jsonPath, 'utf-8');
- const data = JSON.parse(jsonContent);
-
- if (data.dialogues && Array.isArray(data.dialogues)) {
- const regionCount = data.dialogues.length;
- console.log(`✅ 步骤7完成: 坐标JSON已保存到 ${path.basename(jsonPath)} (${regionCount}个区域)`);
- } else {
- throw new Error('JSON格式不正确');
- }
- } catch (error) {
- throw new Error(`步骤7失败: 坐标JSON文件格式错误: ${error.message}`);
- }
- }
- export { startComicTextDetector, DETECTION_PRESETS };
|