| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490 |
- /**
- * 步骤:
- * 1. 创建startOcrDialogBlockReg()函数
- * 2. 接收参数:imagePath(图片路径)
- * 3. 接收参数:textBlocksJsonPath(文字区域json文件路径)
- * 4. 接收参数:textRegionImagePath(文字区域图片路径)
- * 5. 利用onnxOCR识别图片中的文字区域,通过OCR返回的坐标信息计算出文字区域(绿色框)
- * 6. 保存json文件到指定路径下
- */
- import fs from 'fs';
- import path from 'path';
- import { fileURLToPath } from 'url';
- import { execSync } from 'child_process';
- import { getPythonPath } from './python-path.js';
- const __filename = fileURLToPath(import.meta.url);
- const __dirname = path.dirname(__filename);
- const projectRoot = path.join(__dirname, '..');
- /**
- * 步骤1: 创建startOcrDialogBlockReg()函数
- * @param {string} imagePath - 步骤2: 图片路径参数
- * @param {string} textBlocksJsonPath - 步骤3: 文字区域json文件路径参数
- * @param {string} textRegionImagePath - 步骤4: 绘制文字区域图片路径参数
- * @param {Object} ocrConfig - OnnxOCR配置参数(可选)
- * @returns {Object} OCR识别结果
- */
- async function startOcrDialogBlockReg(imagePath, textBlocksJsonPath, textRegionImagePath, ocrConfig = {}) {
- try {
- console.log('🔍 开始OCR文字识别');
-
- // 步骤2: 接收参数:imagePath(图片路径)
- if (!imagePath) {
- throw new Error('步骤2失败: imagePath 参数不能为空');
- }
-
- console.log(`📷 步骤2: 图片路径 - ${imagePath}`);
-
- // 验证图片文件存在
- if (!fs.existsSync(imagePath)) {
- throw new Error(`步骤2失败: 图片文件不存在 - ${imagePath}`);
- }
-
- // 步骤3: 接收参数:textBlocksJsonPath(文字区域json文件路径)
- if (!textBlocksJsonPath) {
- throw new Error('步骤3失败: textBlocksJsonPath 参数不能为空');
- }
-
- console.log(`📄 步骤3: JSON文件路径 - ${textBlocksJsonPath}`);
-
- // 步骤4: 接收参数:textRegionImagePath(文字区域图片路径)
- if (!textRegionImagePath) {
- throw new Error('步骤4失败: textRegionImagePath 参数不能为空');
- }
-
- console.log(`🖼️ 步骤4: 图片文件路径 - ${textRegionImagePath}`);
-
- // 从textBlocksJsonPath推断输出目录
- const outputDir = path.dirname(textBlocksJsonPath);
-
- // 确保输出目录存在
- if (!fs.existsSync(outputDir)) {
- fs.mkdirSync(outputDir, { recursive: true });
- }
-
- // 步骤4: 利用onnxOCR识别图片中的文字区域,通过OCR返回的坐标信息计算出文字区域
- console.log('\n🔍 步骤4: 开始OCR识别和文字区域计算...');
- if (Object.keys(ocrConfig).length > 0) {
- console.log('⚙️ 使用自定义OCR配置:', ocrConfig);
- }
- const ocrData = await performOcrRecognition(imagePath, outputDir, ocrConfig);
-
- // 步骤5: 保存json文件到指定路径
- console.log('\n💾 步骤5: 保存结果到指定文件...');
- const savedResult = await saveJsonToSpecificPath(ocrData, textBlocksJsonPath, textRegionImagePath);
-
- console.log('✅ 所有步骤完成!');
- console.log(`📊 识别到 ${savedResult.totalCount} 个文字块`);
- console.log(`📄 结果已保存: ${savedResult.jsonPath}`);
-
- return savedResult;
-
- } catch (error) {
- console.error(`❌ OCR处理失败: ${error.message}`);
- throw error;
- }
- }
- /**
- * 步骤4: 利用onnxOCR识别图片中的文字区域,通过OCR返回的坐标信息计算出文字区域(绿色框)
- * @param {string} imagePath - 图片路径
- * @param {string} outputDir - 输出目录
- * @param {Object} ocrConfig - OnnxOCR配置参数
- * @returns {Object} OCR识别数据
- */
- async function performOcrRecognition(imagePath, outputDir, ocrConfig = {}) {
- const pythonEnv = getPythonPath();
- const ocrScript = path.join(projectRoot, 'python', 'generate-anim', 'ocr_with_onnxocr.py');
-
- // 检查OCR脚本是否存在
- if (!fs.existsSync(ocrScript)) {
- throw new Error(`步骤4失败: OCR脚本不存在 - ${ocrScript}`);
- }
-
- console.log('🤖 调用OnnxOCR引擎...');
-
- // 构建OCR命令和配置参数
- const absImagePath = path.resolve(imagePath);
- const absOutputDir = path.resolve(outputDir);
-
- // 将配置参数序列化为JSON字符串传递给Python脚本
- const configJson = JSON.stringify(ocrConfig);
- const command = `"${pythonEnv}" "${ocrScript}" "${absImagePath}" "" "${absOutputDir}" "${configJson}"`;
-
- // 执行OCR识别
- execSync(command, {
- encoding: 'utf-8',
- stdio: 'inherit',
- cwd: projectRoot,
- env: {
- ...process.env,
- PYTHONIOENCODING: 'utf-8',
- PYTHONUTF8: '1'
- },
- shell: true
- });
-
- console.log('📍 计算文字区域坐标...');
-
- // 读取OCR结果并计算文字区域
- const imageName = path.basename(imagePath, path.extname(imagePath));
- const dialoguesJsonPath = path.join(outputDir, `${imageName}_dialogues.json`);
-
- // 等待OCR结果文件生成
- await waitForFileGeneration(dialoguesJsonPath);
-
- // 读取和解析OCR结果
- const jsonContent = fs.readFileSync(dialoguesJsonPath, 'utf-8');
- const ocrResult = JSON.parse(jsonContent);
-
- if (!ocrResult.dialogues || !Array.isArray(ocrResult.dialogues)) {
- throw new Error('步骤4失败: OCR结果格式不正确,缺少dialogues数组');
- }
-
- // 计算文字区域(绿色框)
- const textRegions = calculateTextRegions(ocrResult.dialogues);
-
- console.log(`✅ 步骤4完成: 识别到 ${textRegions.length} 个文字区域`);
-
- return {
- imagePath: imagePath,
- imageName: imageName,
- textRegions: textRegions,
- originalOcrResult: ocrResult
- };
- }
- /**
- * 计算文字区域坐标信息(绿色框)
- * @param {Array} dialogues - OCR识别的对话数组
- * @returns {Array} 文字区域数组
- */
- function calculateTextRegions(dialogues) {
- return dialogues.map((dialogue, index) => {
- let bbox = null;
-
- // 处理OCR返回的坐标信息
- if (dialogue.bbox && Array.isArray(dialogue.bbox)) {
- const points = dialogue.bbox;
- const xCoords = points.map(p => p[0]);
- const yCoords = points.map(p => p[1]);
-
- // 计算矩形边界框(绿色框区域)
- bbox = {
- x1: Math.min(...xCoords),
- y1: Math.min(...yCoords),
- x2: Math.max(...xCoords),
- y2: Math.max(...yCoords)
- };
-
- // 计算区域属性
- bbox.width = bbox.x2 - bbox.x1;
- bbox.height = bbox.y2 - bbox.y1;
- bbox.center_x = (bbox.x1 + bbox.x2) / 2;
- bbox.center_y = (bbox.y1 + bbox.y2) / 2;
- bbox.area = bbox.width * bbox.height;
- }
-
- return {
- region_index: index + 1,
- text: dialogue.text || '',
- confidence: dialogue.confidence || 0,
- bbox: bbox,
- green_box_coordinates: bbox // 绿色框坐标
- };
- });
- }
- /**
- * 步骤5: 保存json文件到指定路径并绘制带绿色框的图片
- * @param {Object} ocrData - OCR数据
- * @param {string} textBlocksJsonPath - 指定的JSON文件路径
- * @param {string} textRegionImagePath - 指定的图片文件路径
- * @returns {Object} 保存结果
- */
- async function saveJsonToSpecificPath(ocrData, textBlocksJsonPath, textRegionImagePath) {
- console.log(`📝 保存JSON到: ${path.basename(textBlocksJsonPath)}`);
- console.log(`🖼️ 绘制图片到: ${path.basename(textRegionImagePath)}`);
-
- // 构造结果数据
- const resultData = {
- image_file: path.basename(ocrData.imagePath),
- processing_time: new Date().toISOString(),
- text_regions: ocrData.textRegions,
- total_count: ocrData.textRegions.length,
- ocr_engine: "OnnxOCR",
- output_files: {
- json_file: textBlocksJsonPath,
- region_image: textRegionImagePath
- },
- green_box_info: {
- description: "绿色框坐标信息用于标注文字区域",
- coordinate_system: "左上角为原点,x向右递增,y向下递增"
- }
- };
-
- // 步骤5: 保存json文件到指定路径
- fs.writeFileSync(textBlocksJsonPath, JSON.stringify(resultData, null, 2), 'utf-8');
- console.log(`✅ JSON文件已保存: ${path.basename(textBlocksJsonPath)}`);
-
- // 步骤6: 绘制带绿色框的文字区域图片
- console.log(`🎨 绘制绿色框标注图片...`);
- await drawTextRegionsWithGreenBoxes(ocrData.imagePath, ocrData.textRegions, textRegionImagePath);
- console.log(`✅ 绿色框图片已保存: ${path.basename(textRegionImagePath)}`);
-
- return {
- jsonPath: textBlocksJsonPath,
- textRegionImagePath: textRegionImagePath,
- outputDir: path.dirname(textBlocksJsonPath),
- textRegions: ocrData.textRegions,
- totalCount: ocrData.textRegions.length
- };
- }
- /**
- * 绘制带绿色框的文字区域图片
- * @param {string} originalImagePath - 原图片路径
- * @param {Array} textRegions - 文字区域数组
- * @param {string} outputImagePath - 输出图片路径
- */
- async function drawTextRegionsWithGreenBoxes(originalImagePath, textRegions, outputImagePath) {
- const pythonEnv = getPythonPath();
- const drawScript = path.join(projectRoot, 'python', 'generate-anim', 'draw_text_regions.py');
-
- // 如果Python脚本不存在,创建一个
- if (!fs.existsSync(drawScript)) {
- console.log('🔧 创建绘制脚本...');
- await createDrawTextRegionsScript(drawScript);
- }
-
- // 创建临时JSON文件传递文字区域数据
- const tempJsonPath = path.join(path.dirname(outputImagePath), 'temp_text_regions.json');
- const tempData = {
- image_path: originalImagePath,
- text_regions: textRegions
- };
- fs.writeFileSync(tempJsonPath, JSON.stringify(tempData, null, 2), 'utf-8');
-
- try {
- // 调用Python脚本绘制绿色框
- const command = `"${pythonEnv}" "${drawScript}" "${tempJsonPath}" "${outputImagePath}"`;
-
- execSync(command, {
- encoding: 'utf-8',
- stdio: 'inherit',
- cwd: projectRoot,
- env: {
- ...process.env,
- PYTHONIOENCODING: 'utf-8',
- PYTHONUTF8: '1'
- },
- shell: true
- });
-
- // 等待文件生成
- await waitForFileGeneration(outputImagePath);
-
- } finally {
- // 清理临时文件
- if (fs.existsSync(tempJsonPath)) {
- fs.unlinkSync(tempJsonPath);
- }
- }
- }
- /**
- * 创建绘制文字区域脚本
- * @param {string} scriptPath - 脚本路径
- */
- async function createDrawTextRegionsScript(scriptPath) {
- const scriptContent = `import cv2
- import numpy as np
- import json
- import sys
- from pathlib import Path
- def draw_text_regions_with_green_boxes(temp_json_path, output_image_path):
- try:
- # 确保输出编码为UTF-8
- sys.stdout.reconfigure(encoding='utf-8')
- sys.stderr.reconfigure(encoding='utf-8')
-
- print(f"[INFO] 读取文字区域数据: {temp_json_path}")
- with open(temp_json_path, 'r', encoding='utf-8') as f:
- data = json.load(f)
-
- original_image_path = data['image_path']
- text_regions = data['text_regions']
-
- print(f"[INFO] 读取原图片: {original_image_path}")
-
- # 读取原图片(支持中文路径)
- img_array = np.fromfile(str(original_image_path), dtype=np.uint8)
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
- if img is None:
- raise ValueError(f"无法读取图片: {original_image_path}")
-
- print(f"[INFO] 图片尺寸: {img.shape[:2][::-1]} (宽x高)")
- print(f"[INFO] 文字区域数量: {len(text_regions)}")
-
- # 绘制每个文字区域的绿色框
- for i, region in enumerate(text_regions):
- bbox = region.get('bbox') or region.get('green_box_coordinates')
- text = region.get('text', '')
-
- if bbox and all(k in bbox for k in ['x1', 'y1', 'x2', 'y2']):
- # 提取坐标
- x1, y1 = int(bbox['x1']), int(bbox['y1'])
- x2, y2 = int(bbox['x2']), int(bbox['y2'])
-
- # 绘制绿色矩形框(BGR格式,绿色为(0, 255, 0))
- cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
-
- # 可选:添加文字标签
- if text and len(text) > 0:
- # 在框的上方添加文字(如果空间足够)
- label_y = max(y1 - 5, 15)
- cv2.putText(img, f"{i+1}", (x1, label_y),
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
-
- # 保存结果图片
- output_path = Path(output_image_path)
- output_path.parent.mkdir(parents=True, exist_ok=True)
-
- # 使用cv2.imencode处理中文路径
- success, encoded_img = cv2.imencode('.png', img)
- if success:
- with open(str(output_path), 'wb') as f:
- f.write(encoded_img.tobytes())
- print(f"[OK] 绿色框图片已保存: {output_path}")
- else:
- raise ValueError("图片编码失败")
-
- print(f"[SUCCESS] 成功绘制 {len(text_regions)} 个文字区域的绿色框")
-
- except Exception as e:
- print(f"[ERROR] 绘制绿色框失败: {e}")
- import traceback
- traceback.print_exc()
- sys.exit(1)
- if __name__ == '__main__':
- if len(sys.argv) != 3:
- print("Usage: python draw_text_regions.py <temp_json_path> <output_image_path>")
- sys.exit(1)
-
- temp_json_file = sys.argv[1]
- output_img = sys.argv[2]
- draw_text_regions_with_green_boxes(temp_json_file, output_img)
- `;
-
- // 确保目录存在
- const scriptDir = path.dirname(scriptPath);
- if (!fs.existsSync(scriptDir)) {
- fs.mkdirSync(scriptDir, { recursive: true });
- }
-
- // 写入脚本文件
- fs.writeFileSync(scriptPath, scriptContent, 'utf-8');
- console.log(`✅ 绘制脚本已创建: ${path.basename(scriptPath)}`);
- }
- /**
- * 等待文件生成
- * @param {string} filePath - 文件路径
- */
- async function waitForFileGeneration(filePath) {
- let retries = 50;
- while (retries > 0 && !fs.existsSync(filePath)) {
- await new Promise(resolve => setTimeout(resolve, 100));
- retries--;
- }
-
- if (!fs.existsSync(filePath)) {
- throw new Error(`步骤4失败: OCR结果文件未生成 - ${filePath}`);
- }
- }
- /**
- * 创建OnnxOCR配置对象
- * @param {Object} customConfig - 自定义配置参数
- * @returns {Object} 完整的OnnxOCR配置
- */
- function createOcrConfig(customConfig = {}) {
- const defaultConfig = {
- // 基础设置
- use_angle_cls: true, // 启用角度分类器
- use_gpu: false, // 使用GPU(false=CPU)
-
- // 文字检测参数(影响检测精度)
- det_db_thresh: 0.2, // 文字检测阈值(越小越敏感,0.1-0.5)
- det_db_box_thresh: 0.5, // 文字框置信度阈值(越小检测更多,0.3-0.8)
- det_limit_side_len: 1280, // 图片处理尺寸(越大精度越高,960/1280/1536)
- det_db_unclip_ratio: 1.5, // 文字框扩展比例(1.2-2.0)
- det_box_type: "quad", // 检测框类型(quad/poly)
-
- // 文字识别参数(影响识别精度)
- drop_score: 0.3, // 识别置信度阈值(越小保留更多,0.2-0.6)
- rec_image_shape: "3, 48, 320", // 识别图片尺寸(高度影响精度)
- rec_batch_num: 6, // 识别批处理大小
- max_text_length: 25, // 最大文字长度
-
- // 高级参数
- use_dilation: false, // 使用膨胀操作
- det_db_score_mode: "fast", // 分数计算模式(fast/slow)
-
- // 预设配置
- preset: "high_precision" // 预设配置(balanced/high_precision/fast)
- };
-
- return { ...defaultConfig, ...customConfig };
- }
- /**
- * 获取预设配置
- * @param {string} presetName - 预设名称(balanced/high_precision/fast)
- * @returns {Object} 预设配置
- */
- function getPresetConfig(presetName) {
- const presets = {
- // 平衡配置(默认)
- balanced: {
- det_db_thresh: 0.3,
- det_db_box_thresh: 0.6,
- det_limit_side_len: 960,
- drop_score: 0.5,
- use_angle_cls: true
- },
-
- // 高精度配置(识别更准确,但速度较慢)
- high_precision: {
- det_db_thresh: 0.2,
- det_db_box_thresh: 0.5,
- det_limit_side_len: 1280,
- drop_score: 0.3,
- use_angle_cls: true,
- det_db_unclip_ratio: 1.6
- },
-
- // 快速配置(速度优先,精度稍低)
- fast: {
- det_db_thresh: 0.4,
- det_db_box_thresh: 0.7,
- det_limit_side_len: 640,
- drop_score: 0.6,
- use_angle_cls: false,
- det_db_unclip_ratio: 1.4
- }
- };
-
- return presets[presetName] || presets.balanced;
- }
- export {
- startOcrDialogBlockReg,
- createOcrConfig,
- getPresetConfig
- };
|